<a href="https://colab.research.google.com/github/pedrobslima/glasses-detection/blob/main/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torchvision.transforms as transforms
import os
# from tutorial:
import torchvision
from torch.utils.mobile_optimizer import optimize_for_mobile

### Versão tutorial:

In [None]:
# From tutorial:
model = torchvision.models.mobilenet_v2(pretrained=True)
model.eval()
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model, example)
traced_script_module_optimized = optimize_for_mobile(traced_script_module)
traced_script_module_optimized._save_for_life_interpreter("model.ptl")

### Minha adaptação:

In [2]:
string_path = lambda nmtp, idx: (f'glasses-dataset/{nmtp}/{nmtp}-oculos ({idx}).png', f'{nmtp.upper()} ÓCULOS')  # glasses-dataset\com\com-oculos (418).png

In [3]:
class ImgDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe
        self.transform = transforms.Compose([
          transforms.Resize([224, 224]), # tvz dps mudar as dimensões desse resize
          transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = self.data.iloc[idx].loc['image_path']
        image_class = self.data.iloc[idx].loc['class']

        image = Image.open(image_path)#.convert('L')
        #mask = Image.open(mask_path).convert('L')
        
        #image.load()

        return self.transform(image), image_class

In [4]:
#img = [string_path('com', i) for i in range(438)]
imgs = list(map(string_path, ['com']*438, range(438))) 
#imgs = imgs + [string_path('sem', i) for i in range(224)] # tvz arranjar um jeito de flipar as imagens dps, principalmente se as sem óculos, já q são a minoria
imgs = imgs + list(map(string_path, ['sem']*224, range(224))) # tvz arranjar um jeito de flipar as imagens dps, principalmente se as sem óculos, já q são a minoria

In [5]:
df = pd.DataFrame(imgs, columns=['image_path', 'class'])
df['class'] = df['class'].apply(lambda x: np.float32(1) if "COM ÓCULOS" else np.float32(0))

df_train, df_test = train_test_split(df, test_size=0.25, shuffle=True)

In [6]:
train_dataset = ImgDataset(df_train)
test_dataset = ImgDataset(df_test)

In [8]:
type(train_dataset[0][1])

numpy.float32

In [9]:
BATCH_SIZE = 4
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [10]:
#(width, height)
width = []
height = []

#com-oculos
for i in range(438):
    with Image.open(f'glasses-dataset\com\com-oculos ({i}).png') as im:
        temp = im.size
    width.append(temp[0])
    height.append(temp[1])

#sem-oculos
for i in range(224):
    with Image.open(f'glasses-dataset\sem\sem-oculos ({i}).png') as im:
        temp = im.size
    width.append(temp[0])
    height.append(temp[1])

In [12]:
print("Min width:", min(width), "\t\t\tMin height:", min(height))
print("Avg width", sum(width)/len(width), "\tAvg height", sum(height)/len(height))

Min width: 532 			Min height: 576
Avg width 1055.7099697885196 	Avg height 1724.309667673716


In [11]:
# Para executar com a GPU:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [12]:
mobnet = torchvision.models.mobilenet_v2(pretrained=False) # uso uma pré-treinada ou não?



In [215]:
mobnet

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [216]:
mobnet.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=False)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)

In [13]:
classf = nn.Sequential(
    nn.Dropout(0.2),
    nn.Linear(in_features=1280, out_features=1, bias=True)
)

In [14]:
mobnet.classifier = classf

In [15]:
mobnet = mobnet.to(DEVICE)

In [16]:
def train_net(model, tloader, vloader, num_epochs, optimizer, lossFunc):
  train_losses = []
  val_losses = []
  for e in range(num_epochs):
    train_loss = 0.0 # total loss during single epoch training
    val_loss = 0.0
    model.train()
    for imgs, labels in tloader:
      imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)

      pred = model(imgs).squeeze() # predictions based on batch imgs
      loss = lossFunc(pred, labels) 
      optimizer.zero_grad() # clears x.grad for every parameter x in the optimizer.
                            # It’s important to call this before loss.backward(), otherwise you’ll accumulate the gradients from multiple passes.

      loss.backward() # computes dloss/dx for every parameter x which has requires_grad=True. These are accumulated into x.grad for every parameter x
      optimizer.step() # updates the value of x using the gradient x.grad

      l = loss.item()
      train_loss += l # value of loss?
      print(f'Epoch [{e + 1}/{num_epochs}], Step [{i + 1}/{len(tloader)}], Loss: {l:.4f} ')
    model.eval()
    with torch.no_grad():
      for imgs, labels in vloader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        pred = model(imgs).squeeze()
        l = lossFunc(pred, labels).item()
        val_loss += l
        #val_dice += (2 * (pred * labels).sum()) / (2 * (pred * labels).sum()+ ((pred*labels)<1).sum())

      avg_train_loss = train_loss / len(tloader)
      avg_val_loss = val_loss / len(vloader)
      #avg_train_dice = train_dice / len(train_loader)
      #avg_val_dice = val_dice / len(vloader)
      print(f'[EVAL TIME] Epoch [{e + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Eval Loss: {avg_val_loss:.4f}')
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)
  return train_losses, val_losses
     

In [37]:
for imgs, labels in test_dataloader:
    print(type(imgs), type(labels))
    print(labels)
    imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)

    pred = mobnet(imgs).squeeze() # predictions based on batch imgs
    print(pred)
    break

<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([1., 1., 1., 1.], dtype=torch.float64)
tensor([0.2165, 0.3740, 0.2173, 0.1820], device='cuda:0',
       grad_fn=<SqueezeBackward0>)


In [42]:
labels.float()

tensor([1., 1., 1., 1.], device='cuda:0')

In [17]:
lossFunc = nn.BCELoss()
optm = optim.Adam(mobnet.parameters(), lr=0.001, weight_decay = 1e-6)
train_loss, val_loss = train_net(mobnet, train_dataloader, test_dataloader, 10, optm, lossFunc)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
