In [1]:
import fastai as fai
import torch
import torch.nn as nn

import fastai.vision.all as fv
from torchvision.transforms import ToTensor, ToPILImage

In [2]:

def load_data(folder, img_size, batch_size):
    
    tfms = fv.aug_transforms(flip_vert=True, max_rotate=360, max_lighting=0.3,max_zoom=1.2,max_warp=0.2)
    
    data = fv.DataBlock(
                        blocks    = (fv.ImageBlock, fv.CategoryBlock),
                        get_items = fv.get_image_files,
                        get_y     = lambda x: x.parent.name,
                        splitter  = fv.GrandparentSplitter(),
                        item_tfms = fv.Resize(img_size),
                        batch_tfms= tfms,
                     )
    return data.dataloaders(folder,bs=batch_size)

In [3]:
data = load_data("flowers", img_size=224, batch_size=128).cpu()

In [4]:
#Flatten servirá para conectar las capas convolucionales con las lineales (densas)
class Flatten(nn.Module):
    def __init__(self):#init es cuando creas un objeto siempre
        super().__init__() #esto tambien es de cajon en todos los modulos de python
    
    def forward(self,x): #esta funcion lo unico que hace es pasar de convolucional a la parte lineal o densa
        #return x.squeeze() #esto tiene un pequeño error cuando la Batch Size sea de 1: la va a quitar
        return x.view(x.shape[0],-1) #o x.reshape

In [5]:
model = nn.Sequential(
    nn.BatchNorm2d(3),
    nn.Conv2d(3,32, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(32,64, kernel_size=3, stride=2, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(64,128, kernel_size=3, stride=2, padding=1),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(128,256, kernel_size=3, stride=2, padding=1),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(256,512, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    Flatten(),
    nn.BatchNorm1d(512),
    nn.Linear(512,256),
    nn.ReLU(),
    nn.BatchNorm1d(256),
    nn.Linear(256, data.c))

In [6]:
x,y = data.one_batch()

In [7]:
x.shape #la batch de imágenes

torch.Size([128, 3, 224, 224])

In [8]:
y,y.shape

(TensorCategory([ 47,  78,  97,  34,  96,  23,  49,  15,  97,  69,  97,   6,  29,   6,
          73,  82,  73,  56,  44,  96,  51,  31,  35,   3,  66,   2,  63,  44,
          85,  79,  21,  95,  46,  82,   1, 101,  59,  53,  62,  25,  69,  20,
          98,  31,  69,  69,  11,  84,   2,  39,  38,  26,  82,   3,  90,  62,
          31,  77,  35,  89,   7,  30,  22,  98,  93,  94,  79,  64,  97,  63,
          82,  22,  89,   8,  96,  64,  43,   9,  50,   3,  74,  86,  29,  46,
          66,  90,  92,  30,  69,   3,  68,  43,  36,  42,  58,  69,  46,  96,
          49,  97,  97,  91,  61,  31,  66,  75,  25,  98,  18,   0,  66,   9,
          62,  81,  74,  45,  52,  98,  75,  86, 101,  67,  89,  60,  70,  19,
          26,  35]),
 torch.Size([128]))

In [9]:
model(x),model(x).shape

(TensorImage([[ 0.6981,  0.2981,  0.7111,  ...,  2.1236, -0.4089,  0.3520],
         [ 0.3114,  0.5317,  0.4605,  ...,  1.5059,  0.6413,  0.8027],
         [-1.4082,  0.0556, -0.7916,  ...,  0.1958, -0.0925, -0.8400],
         ...,
         [ 0.2047, -0.0108, -0.0568,  ..., -0.1234,  0.4563, -0.4029],
         [-1.1821,  0.2389, -0.4801,  ..., -0.9712,  0.4503,  0.4711],
         [-0.4847,  0.7709, -0.2336,  ..., -0.0716,  0.1796,  0.4752]],
        grad_fn=<AliasBackward>),
 torch.Size([128, 102]))

## Sigmoide

In [10]:
torch.sigmoid(model(x))

TensorImage([[0.6678, 0.5740, 0.6707,  ..., 0.8932, 0.3992, 0.5871],
        [0.5772, 0.6299, 0.6131,  ..., 0.8184, 0.6550, 0.6905],
        [0.1965, 0.5139, 0.3118,  ..., 0.5488, 0.4769, 0.3015],
        ...,
        [0.5510, 0.4973, 0.4858,  ..., 0.4692, 0.6121, 0.4006],
        [0.2347, 0.5594, 0.3822,  ..., 0.2746, 0.6107, 0.6156],
        [0.3811, 0.6837, 0.4419,  ..., 0.4821, 0.5448, 0.6166]],
       grad_fn=<AliasBackward>)

## Softmax

In [11]:
torch.softmax(model(x),dim=1) #dim 1 para cada renglón

TensorImage([[0.0176, 0.0118, 0.0178,  ..., 0.0731, 0.0058, 0.0124],
        [0.0103, 0.0129, 0.0120,  ..., 0.0341, 0.0144, 0.0169],
        [0.0020, 0.0088, 0.0038,  ..., 0.0101, 0.0076, 0.0036],
        ...,
        [0.0099, 0.0080, 0.0076,  ..., 0.0071, 0.0128, 0.0054],
        [0.0026, 0.0106, 0.0052,  ..., 0.0032, 0.0131, 0.0134],
        [0.0054, 0.0188, 0.0069,  ..., 0.0081, 0.0104, 0.0140]],
       grad_fn=<AliasBackward>)

In [12]:
torch.softmax(model(x),dim=1).sum(dim=1)

TensorImage([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0

In [13]:
torch.softmax(torch.randn(5,3),dim=1)

tensor([[0.3331, 0.1335, 0.5334],
        [0.4596, 0.3072, 0.2332],
        [0.2866, 0.1999, 0.5135],
        [0.5496, 0.2319, 0.2185],
        [0.7035, 0.0870, 0.2096]])

In [14]:
0.7035+ 0.0870+ 0.2096

1.0001

### MSE loss

In [15]:
yp = model(x)

In [16]:
yp.shape

torch.Size([128, 102])

In [17]:
y, y.shape

(TensorCategory([ 47,  78,  97,  34,  96,  23,  49,  15,  97,  69,  97,   6,  29,   6,
          73,  82,  73,  56,  44,  96,  51,  31,  35,   3,  66,   2,  63,  44,
          85,  79,  21,  95,  46,  82,   1, 101,  59,  53,  62,  25,  69,  20,
          98,  31,  69,  69,  11,  84,   2,  39,  38,  26,  82,   3,  90,  62,
          31,  77,  35,  89,   7,  30,  22,  98,  93,  94,  79,  64,  97,  63,
          82,  22,  89,   8,  96,  64,  43,   9,  50,   3,  74,  86,  29,  46,
          66,  90,  92,  30,  69,   3,  68,  43,  36,  42,  58,  69,  46,  96,
          49,  97,  97,  91,  61,  31,  66,  75,  25,  98,  18,   0,  66,   9,
          62,  81,  74,  45,  52,  98,  75,  86, 101,  67,  89,  60,  70,  19,
          26,  35]),
 torch.Size([128]))

In [18]:
bs,c=yp.shape

In [19]:
z = torch.zeros_like(yp) #crear tensor de zeros del mismo tamaño de yp
z[torch.arange(bs),y] = 1 #le pongo 1s a las posiciones apropiadas

In [21]:
z[0]

TensorImage([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [23]:
import torch.nn.functional as F

In [27]:
def distrancia_ecuclideana(yp, y):
    bs, c= yp.shape
    yp_normalizado = torch.softmax(yp,dim=1)
    z = torch.zeros_like(yp)
    z[torch.arange(bs,device=z.device),y] = 1
    return F.mse_loss(yp_normalizado, z)

In [25]:
data.cuda()

<fastai.data.core.DataLoaders at 0x7fa5bd0ea0a0>

In [28]:
learn = fv.Learner(data,model,loss_func=distrancia_ecuclideana,metrics=fv.accuracy)

In [29]:
learn.fit_one_cycle(3)

epoch,train_loss,valid_loss,accuracy,time
0,0.009536,0.009566,0.118343,00:16
1,0.009159,0.008762,0.220907,00:15
2,0.008747,0.00839,0.270217,00:15


## Cross Entropy

In [30]:
yp, y

(TensorImage([[ 0.6981,  0.2981,  0.7111,  ...,  2.1236, -0.4089,  0.3520],
         [ 0.3114,  0.5317,  0.4605,  ...,  1.5059,  0.6413,  0.8027],
         [-1.4082,  0.0556, -0.7916,  ...,  0.1958, -0.0925, -0.8400],
         ...,
         [ 0.2047, -0.0108, -0.0568,  ..., -0.1234,  0.4563, -0.4029],
         [-1.1821,  0.2389, -0.4801,  ..., -0.9712,  0.4503,  0.4711],
         [-0.4847,  0.7709, -0.2336,  ..., -0.0716,  0.1796,  0.4752]],
        grad_fn=<AliasBackward>),
 TensorCategory([ 47,  78,  97,  34,  96,  23,  49,  15,  97,  69,  97,   6,  29,   6,
          73,  82,  73,  56,  44,  96,  51,  31,  35,   3,  66,   2,  63,  44,
          85,  79,  21,  95,  46,  82,   1, 101,  59,  53,  62,  25,  69,  20,
          98,  31,  69,  69,  11,  84,   2,  39,  38,  26,  82,   3,  90,  62,
          31,  77,  35,  89,   7,  30,  22,  98,  93,  94,  79,  64,  97,  63,
          82,  22,  89,   8,  96,  64,  43,   9,  50,   3,  74,  86,  29,  46,
          66,  90,  92,  30,  69,   3,

In [31]:
F.cross_entropy(yp,y)

TensorImage(4.7532, grad_fn=<AliasBackward>)

In [32]:
model = nn.Sequential(
    nn.BatchNorm2d(3),
    nn.Conv2d(3,32, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(32,64, kernel_size=3, stride=2, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(64,128, kernel_size=3, stride=2, padding=1),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(128,256, kernel_size=3, stride=2, padding=1),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(256,512, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    Flatten(),
    nn.BatchNorm1d(512),
    nn.Linear(512,256),
    nn.ReLU(),
    nn.BatchNorm1d(256),
    nn.Linear(256, data.c))

In [33]:
learn = fv.Learner(data,model, loss_func=F.cross_entropy,metrics=fv.accuracy)

In [35]:
learn.fit_one_cycle(3)

epoch,train_loss,valid_loss,accuracy,time
0,3.803591,3.284229,0.211045,00:16
1,3.116828,2.768371,0.311637,00:15
2,2.696129,2.421287,0.38856,00:15


# Dropout

In [36]:
drop = nn.Dropout(p=0.5)#va a matar aleatoriamente la mitad de las activaciones

In [37]:
x = torch.rand(1,10)

In [38]:
x

tensor([[0.1536, 0.4293, 0.9672, 0.8392, 0.0232, 0.2964, 0.5813, 0.6330, 0.1087,
         0.1348]])

In [39]:
drop(x)

tensor([[0.3073, 0.8587, 1.9344, 1.6784, 0.0465, 0.5929, 0.0000, 0.0000, 0.2174,
         0.2696]])

In [40]:
drop(x)/x #en este caso el factor es de 2

tensor([[0., 2., 0., 0., 2., 2., 2., 0., 0., 2.]])

In [41]:
fv.create_head(10,20,ps=0.2)

Sequential(
  (0): AdaptiveConcatPool2d(
    (ap): AdaptiveAvgPool2d(output_size=1)
    (mp): AdaptiveMaxPool2d(output_size=1)
  )
  (1): Flatten(full=False)
  (2): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.1, inplace=False)
  (4): Linear(in_features=10, out_features=512, bias=False)
  (5): ReLU(inplace=True)
  (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.2, inplace=False)
  (8): Linear(in_features=512, out_features=20, bias=False)
)

## Rant: Normalización en el pipeline 
Disclaimer: lo siguiente es lo que **NO** tienes que hacer

In [42]:
data.one_batch()[0].max()

TensorImage(1., device='cuda:0')

In [43]:
learn = fv.cnn_learner(data, fv.resnet18) #Fastai madofica data

In [45]:
x,y = data.one_batch(); x

TensorImage([[[[ 6.8496e-01,  5.5976e-01,  4.8874e-01,  ..., -1.1879e+00,
           -1.1040e+00, -9.6079e-01],
          [ 5.1974e-01,  4.7323e-01,  4.0884e-01,  ..., -1.1359e+00,
           -1.2606e+00, -1.2288e+00],
          [ 4.1374e-01,  3.8843e-01,  3.4905e-01,  ..., -1.1664e+00,
           -1.2057e+00, -1.1853e+00],
          ...,
          [ 9.2473e-01,  9.3705e-01,  8.8014e-01,  ...,  6.7229e-01,
            6.4242e-01,  5.3628e-01],
          [ 9.0665e-01,  8.9364e-01,  8.6092e-01,  ...,  6.6271e-01,
            6.5749e-01,  5.5991e-01],
          [ 8.1143e-01,  8.0738e-01,  7.9074e-01,  ...,  6.1540e-01,
            6.2962e-01,  6.0171e-01]],

         [[ 1.4642e+00,  1.4095e+00,  1.3567e+00,  ..., -8.8660e-01,
           -7.2954e-01, -5.3610e-01],
          [ 1.3675e+00,  1.3188e+00,  1.2719e+00,  ..., -9.8283e-01,
           -1.0049e+00, -9.1443e-01],
          [ 1.2636e+00,  1.2331e+00,  1.2031e+00,  ..., -1.0795e+00,
           -1.0837e+00, -1.0061e+00],
          ...,


Lo que sí hay que hacer

In [46]:
data = load_data("flowers/", 128,64)

In [47]:
learn = fv.cnn_learner(data, fv.resnet18, normalize=False)

In [48]:
x,y = data.one_batch(); x

TensorImage([[[[5.8905e-01, 4.7535e-01, 3.3563e-01,  ..., 2.4043e-01,
           4.0860e-01, 3.7871e-01],
          [5.9370e-01, 5.3798e-01, 4.4467e-01,  ..., 3.5732e-01,
           4.3652e-01, 4.4750e-01],
          [5.4872e-01, 5.4318e-01, 4.7088e-01,  ..., 4.1778e-01,
           4.5799e-01, 4.2388e-01],
          ...,
          [4.5593e-01, 5.3773e-01, 5.2584e-01,  ..., 4.0209e-01,
           4.3608e-01, 4.8121e-01],
          [4.2766e-01, 4.3637e-01, 4.0105e-01,  ..., 3.6656e-01,
           3.8448e-01, 4.2904e-01],
          [4.2246e-01, 4.0106e-01, 3.5437e-01,  ..., 4.1231e-01,
           3.8813e-01, 4.0922e-01]],

         [[5.5812e-01, 5.1150e-01, 4.4174e-01,  ..., 3.9598e-01,
           5.4713e-01, 5.1951e-01],
          [5.6958e-01, 5.5412e-01, 4.9710e-01,  ..., 4.8608e-01,
           5.6207e-01, 5.6930e-01],
          [5.7169e-01, 5.6273e-01, 5.0705e-01,  ..., 5.4162e-01,
           5.7195e-01, 5.3730e-01],
          ...,
          [6.1911e-01, 6.8550e-01, 6.7578e-01,  ..., 4

Normalizando y desnormalizando

In [49]:
fv.imagenet_stats #por estos datos esperan los resnet normalizar

([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

In [50]:
class Normalizer(nn.Module):
    def __init__(self, mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]): 
        super().__init__()
        self.mean = torch.tensor(mean)[None,:,None,None]
        self.std = torch.tensor(std)[None,:,None,None]
        
    def forward(self,x):
        m = self.mean.to(x)
        s = self.std.to(x)
        return (x-m)/s

In [51]:
norm = Normalizer()

In [52]:
norm(torch.randn(1,3,128,128))

tensor([[[[ -0.0172,  -1.7057,  -2.5804,  ...,  -1.1515,  -1.2079,   6.6562],
          [ -0.8360,  -5.5545,   0.4247,  ..., -10.4694,  -7.0180,  -6.8103],
          [ -2.3168,  -0.9566,  -0.3995,  ...,   4.5261,  -1.8944,  -3.7446],
          ...,
          [ -3.9944,  -7.7687,   2.2836,  ...,   0.6829,  -1.6392,  -1.0009],
          [  1.2091,  -6.4376,  -3.4287,  ...,  -0.5127,   0.4010,  -0.1861],
          [ -6.3290,   6.6867,  -2.6729,  ..., -11.4198,  -8.1186,  -9.0158]],

         [[ -9.3681,  -1.3645,   1.4513,  ...,  -1.2663,  -5.8293,   1.8533],
          [  2.7708,  -7.9702,  -6.9271,  ...,  -8.7777,  -1.0926,  -2.9316],
          [ -2.9826,  -7.5074,   4.3988,  ...,  -0.0720,  -8.3963,   2.1120],
          ...,
          [ -6.6603,  -2.7246,  -5.6108,  ...,  -5.4349,  -4.7074,   3.3870],
          [ -1.5184,  -6.9386,  -7.3731,  ...,  -2.6532,  -4.3538,  -7.6216],
          [ -9.6578,  -7.6979,   1.5624,  ...,  -6.3831,   7.4009,  -5.9192]],

         [[ -2.3664,  -5.0943,

In [54]:
learn = fv.cnn_learner(data,fv.resnet18, normalize=False)
learn.model = nn.Sequential(Normalizer(),learn.model) 