In [0]:
from torchvision import models, transforms
import torch
import torch.nn as nn
import torch.optim as optim

vgg = models.vgg16(pretrained=True)
device = torch.device("cuda")
vgg = vgg.to(device)

In [0]:
# deshabilito gradiente de todos los parametros de la red
# esto es porque el entrenamiento solo se realizaráá para
# la capa fully conected que se agrega al final
for param in vgg.parameters():
  param.requires_grad = False

In [0]:
# 3. extraccióón de la úúltima capa de la red
# VGG fue construida usando nn.Sequential, por lo que la "úúltima capa"
# en realidad son muchas capas juntas
# por ello se llaman 2 veces el méétodo children
last_sequential_layer = list(vgg.children())[-1]
*list_of_layers, last_layer = list(last_sequential_layer.children())
in_features = last_layer.in_features

In [96]:
list_of_layers

[Linear(in_features=25088, out_features=4096, bias=True),
 ReLU(inplace=True),
 Dropout(p=0.5, inplace=False),
 Linear(in_features=4096, out_features=4096, bias=True),
 ReLU(inplace=True),
 Dropout(p=0.5, inplace=False)]

In [97]:
list_of_layers +  [nn.Linear(in_features,6)]

[Linear(in_features=25088, out_features=4096, bias=True),
 ReLU(inplace=True),
 Dropout(p=0.5, inplace=False),
 Linear(in_features=4096, out_features=4096, bias=True),
 ReLU(inplace=True),
 Dropout(p=0.5, inplace=False),
 Linear(in_features=4096, out_features=6, bias=True)]

In [98]:
in_features

4096

In [99]:
vgg.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [0]:
# se mantienen las úúltimas capas que iban en nn.Sequential
# menos la úúltima donde esta se reemplaza por la que se quiere entrenar
vgg.fc = nn.Linear(in_features,6)
vgg.fc.requires_grad = True
vgg.classifier = nn.Sequential(*(list_of_layers + [vgg.fc]))

In [0]:
# entrenamiento
# en general las redes pre-entrenadas fueron entrenadas con datasets que teníían cierta
# media y desviacióón estandar, es recomendable usar estas en el pipeline
# de preprocesamiento

In [102]:
from google.colab import drive
drive.mount("/gdrive")

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [103]:
# visualization (splitted dataset)
!ls "/gdrive/My Drive/dl-pytorch/datasets/64x64_SIGNS"

test_signs  train_signs  val_signs


In [0]:
import sys
sys.path.append("/gdrive/My Drive/dl-pytorch/")

In [0]:
# implementar un dataset hereda de otra clase que viene en pytorch
# hay que implementar 3 métodos
# 1. init: set de atributos del dataset (rutas hacia las imáágenes)
# 2. length: cuantas imáágenes tiene el dataset?
# 3. get_item: acceder a las listas
# # pipeline de procesamiento (transform)
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader

class SIGNSDataset(Dataset):
  def __init__(self,base_dir, split="train", transform=None):
    path = os.path.join(base_dir, "{}_signs".format(split))
    files = os.listdir(path)

    self.filenames = [os.path.join(path,f) for f in files if f.endswith(".jpg")]
    # labels
    self.targets = [int(f[0]) for f in files]
    self.transform = transform

  # retorna la cantidad de imáágenes
  def __len__(self):
    return len(self.filenames)

  # retorna la imagen y el target
  def __getitem__(self,idx):  
    # carga la imagen
    image = Image.open(self.filenames[idx])
    # si se da algún pipeline, se aplica
    if self.transform:
      image = self.transform(image)
    return image,self.targets[idx]


In [0]:
# pipeline de pre-procesamiento
transform = transforms.Compose(
    [
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
)

In [0]:
trainset = SIGNSDataset("/gdrive/My Drive/dl-pytorch/datasets/64x64_SIGNS",split="train",transform= transform)
trainloader = DataLoader(trainset,batch_size=32)

In [0]:
valset = SIGNSDataset("/gdrive/My Drive/dl-pytorch/datasets/64x64_SIGNS",split="val",transform= transform)
valloader = DataLoader(valset,batch_size=32)

In [0]:
testset = SIGNSDataset("/gdrive/My Drive/dl-pytorch/datasets/64x64_SIGNS",split="test",transform= transform)
testloader = DataLoader(testset,batch_size=32)

In [0]:
dataloaders = {'train':trainloader,
              'val':valloader,
              'test':testloader}

In [111]:
vgg.parameters()

<generator object Module.parameters at 0x7f4e70666308>

In [0]:
loss_fn = nn.NLLLoss()
optimizer = optim.SGD(vgg.parameters(), lr=1e-3, momentum = 0.9)
vgg = vgg.to(device)

In [0]:
# tip: clase para logeo de las méétricas en cada iteración
class RunningMetric():
  def __init__(self):
    self.Suma = 0
    self.N = 0

  def update(self, val, size):
    self.Suma += val
    self.N += size

  def __call__(self):
    return self.Suma/float(self.N)

In [0]:
def train_and_evaluate(model, optimizer, loss_fn, dataloaders, device, num_epoch = 10, lr = 0.001):
  for g in optimizer.param_groups:
    g["lr"] = lr # permite modificar le learning rate

  

  for epoch in range(num_epoch):
    print("Epoch {}/{}".format(epoch+1, num_epoch))
    print("-"*10)

    for phase in ["train","val"]:
      # dataloader = dataloaders[phase]
      if phase == "train":
        model.train()
      else:
        model.eval()

      # dataloader = dataloaders[phase]

      running_loss = RunningMetric()
      running_acc = RunningMetric()

      # running_loss = 0.0
      # running_corrects = 0


      for inputs, targets in dataloaders[phase]:
        #primero: input y target a gpu
        inputs, targets = inputs.to(device), targets.to(device)
        # para el nuevo batch se lleva a cero los gradientes
        optimizer.zero_grad()
        # en las fases de test no necesitamos actualizar gradientes
        with torch.set_grad_enabled(phase == "train"):
          # outputs = net(inputs)
          
          # _, preds = torch.max(outputs,1) # target predicho
          outputs = vgg(inputs)
          _, preds = torch.max(outputs,1)

          loss = loss_fn(outputs, targets) # calcula la pérdida
          if phase == "train":
            loss.backward() # gradients with backpropagation
            optimizer.step() # actualiza los parametros 

        batch_size = inputs.size()[0]
        running_loss.update(loss.item()*batch_size, batch_size)
        running_acc.update(torch.sum(preds == targets).float(), batch_size)
        # running_loss += loss.item() * inputs.size(0)
        # running_corrects += torch.sum(preds == targets.data)
      
      
      # epoch_loss = running_loss / len(dataloaders[phase])
      # epoch_acc = running_corrects.double() / len(dataloaders[phase])
      # print('{} Loss: {:.4f} Acc: {:.4f}'.format(
      #     phase, epoch_loss, epoch_acc))
      print("Phase: {} Loss: {:.4f} Acc: {:.4f}".format(phase,running_loss(),running_acc()))
  
  return model


In [115]:
train_and_evaluate(vgg, optimizer, loss_fn, dataloaders, device, num_epoch = 100)

Epoch 1/100
----------
Phase: train Loss: -31.7996 Acc: 0.4850
Phase: val Loss: -81.2977 Acc: 0.6435
Epoch 2/100
----------
Phase: train Loss: -143.8801 Acc: 0.6238
Phase: val Loss: -198.4808 Acc: 0.6435
Epoch 3/100
----------
Phase: train Loss: -267.3941 Acc: 0.6539
Phase: val Loss: -320.7415 Acc: 0.6481
Epoch 4/100
----------
Phase: train Loss: -389.5348 Acc: 0.6366
Phase: val Loss: -444.2230 Acc: 0.6343
Epoch 5/100
----------
Phase: train Loss: -514.5981 Acc: 0.6377
Phase: val Loss: -561.9692 Acc: 0.6435
Epoch 6/100
----------
Phase: train Loss: -638.0148 Acc: 0.6227
Phase: val Loss: -677.4919 Acc: 0.6481
Epoch 7/100
----------
Phase: train Loss: -764.4066 Acc: 0.6238
Phase: val Loss: -805.2958 Acc: 0.6528
Epoch 8/100
----------
Phase: train Loss: -884.5601 Acc: 0.6250
Phase: val Loss: -925.2132 Acc: 0.6435
Epoch 9/100
----------
Phase: train Loss: -1013.3043 Acc: 0.6458
Phase: val Loss: -1043.9694 Acc: 0.6435
Epoch 10/100
----------
Phase: train Loss: -1141.5556 Acc: 0.6262
Phase: 

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1