In [78]:
!nvidia-smi

Tue Apr 13 12:16:29 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   73C    P0    72W / 149W |   8236MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [79]:
! pip install fastai



In [80]:
from fastai.vision.all import *
import gc

In [81]:
gc.collect()
path = untar_data(URLs.IMAGENETTE)

In [82]:
dblock = DataBlock(blocks=(ImageBlock,CategoryBlock), 
          get_items = get_image_files,
          get_y = parent_label,
          item_tfms = Resize(460),
          batch_tfms = aug_transforms(min_scale=0.75,size=224))

In [83]:
dls = dblock.dataloaders(path, bs=64)

### Details

loss is cross entropy since we have multiple classes in imagenette. 

applies log_sigmoid to scale 0 to 1 and then nll_loss to pick the loss for the category that we gave in y.



In [84]:
loss = nn.CrossEntropyLoss()
model = xresnet50()
learn = Learner(dls, model = model, loss_func=loss, metrics=accuracy)

In [85]:
learn.fit_one_cycle(n_epoch = 4, lr_max = 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.613505,1.446077,0.513443,05:08
1,1.150217,1.877411,0.506721,05:07
2,0.830718,0.748242,0.759522,05:07
3,0.654599,0.585856,0.817401,05:07


### Normalization

aim is to have 0 mean and 1 std.

In [86]:
x,y = dls.one_batch()

In [87]:
x.shape

torch.Size([64, 3, 224, 224])

In [88]:
x.mean(dim=[0,2,3])

TensorImage([0.4548, 0.4634, 0.4383], device='cuda:0')

In [89]:
x.std(dim = [0,2,3])

TensorImage([0.2783, 0.2801, 0.3014], device='cuda:0')

apply imagenet stats as batch norm for dataloaders

In [90]:
dblock = DataBlock(blocks=(ImageBlock,CategoryBlock), 
          get_items = get_image_files,
          get_y = parent_label,
          item_tfms = Resize(460),
          batch_tfms = [*aug_transforms(min_scale=0.75,size=224),Normalize.from_stats(*imagenet_stats)] )


In [91]:
dls = dblock.dataloaders(path, bs=64)

Check std and mean

In [92]:
x,y = dls.one_batch()

In [93]:
x.mean(dim=[0,2,3]), x.std(dim = [0,2,3])

(TensorImage([-0.1548, -0.0213,  0.0086], device='cuda:0'),
 TensorImage([1.2543, 1.2347, 1.3218], device='cuda:0'))

In [94]:
loss = nn.CrossEntropyLoss()
model = xresnet50()
learn = Learner(dls, model = model, loss_func=loss, metrics=accuracy)

In [95]:
learn.fit_one_cycle(n_epoch = 4, lr_max = 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.676333,2.126866,0.379014,05:08
1,1.202324,1.204685,0.607916,05:07
2,0.900115,0.911839,0.715086,05:08
3,0.687966,0.624592,0.81068,05:07


### Progressive resizing
change the size of the image from small to big image file size like we did for discriminative learning rates.

In [96]:
def get_dls(path, size=224, bs = 64):
  dblock = DataBlock(blocks=(ImageBlock,CategoryBlock), 
          get_items = get_image_files,
          get_y = parent_label,
          item_tfms = Resize(460),
          batch_tfms = [*aug_transforms(min_scale=0.75,size=size),Normalize.from_stats(*imagenet_stats)] )
  dls = dblock.dataloaders(path, bs=bs)
  return dls

In [97]:
dls = get_dls(path, size = 128, bs = 128)

In [98]:
loss = CrossEntropyLossFlat()
model = xresnet50()
learn = Learner(dls, model = model, loss_func=loss, metrics=accuracy)

In [99]:
learn.fit_one_cycle(n_epoch = 4, lr_max = 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.850843,3.161836,0.34354,02:54
1,1.268619,1.632863,0.522031,02:52
2,0.929939,0.80619,0.749066,02:52
3,0.728064,0.651689,0.799851,02:53


Now update the dls with larger images.

In [100]:
learn.dls = get_dls(path, size = 224, bs = 64)
learn.fine_tune(epochs = 5, base_lr = 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,0.860408,1.197723,0.612024,05:08


epoch,train_loss,valid_loss,accuracy,time
0,0.657759,0.731116,0.774832,05:09
1,0.676394,0.651125,0.794623,05:08
2,0.566554,0.639279,0.78118,05:08
3,0.473452,0.454418,0.859597,05:08
4,0.435567,0.422796,0.866692,05:08


### Use mixup

In [124]:
loss = CrossEntropyLossFlat()
model = xresnet50()
dls = get_dls(path,size = 64, bs =128)
learn = Learner(dls, model = model, loss_func=loss, metrics=accuracy, cbs = MixUp)
learn.fit_one_cycle(n_epoch = 5, lr_max = 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,2.251003,1.884158,0.427931,02:34
1,1.765774,1.464274,0.520164,02:33
2,1.521738,1.013355,0.681105,02:33
3,1.349404,0.902084,0.715086,02:33
4,1.243419,0.796368,0.752427,02:34


In [125]:
learn.dls = get_dls(path, size = 128, bs = 64)
learn.fine_tune(epochs = 10, base_lr = 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.384305,1.162283,0.619492,05:09


epoch,train_loss,valid_loss,accuracy,time
0,1.200563,0.767448,0.76587,05:09
1,1.188071,0.993022,0.678118,05:09
2,1.176783,0.904067,0.715459,05:09
3,1.156271,0.689699,0.797237,05:08
4,1.104602,0.795535,0.75168,05:09
5,1.064727,0.656453,0.798357,05:08
6,1.013233,0.56132,0.826363,05:09
7,0.995163,0.487473,0.855489,05:09
8,0.971695,0.47054,0.864824,05:10
9,0.945726,0.463779,0.864451,05:09


### Test Time Augmentation and mixup

In [134]:
loss = CrossEntropyLossFlat()
model = xresnet50()
dls = get_dls(path,size = 64, bs =128)
learn = Learner(dls, model = model, loss_func=loss, metrics=accuracy, cbs = MixUp)
learn.fit_one_cycle(n_epoch = 2, lr_max = 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,2.110919,1.620425,0.486931,02:33
1,1.583353,1.068782,0.667662,02:33


(tensor([[2.7655e-02, 1.5979e-01, 1.8580e-01,  ..., 8.3680e-07, 1.2127e-07,
          2.3799e-07],
         [4.2141e-03, 2.7513e-03, 3.3834e-03,  ..., 5.0672e-10, 1.4942e-11,
          8.3662e-11],
         [3.6196e-02, 5.2357e-02, 2.4328e-01,  ..., 5.2097e-08, 1.5099e-08,
          3.2253e-08],
         ...,
         [6.5071e-04, 1.5002e-03, 8.0386e-01,  ..., 4.2782e-10, 2.1710e-10,
          3.4612e-10],
         [3.0215e-02, 2.1025e-02, 8.9948e-03,  ..., 3.9672e-07, 7.2147e-08,
          1.7754e-07],
         [2.8242e-01, 5.9730e-02, 1.9130e-02,  ..., 1.8190e-08, 3.3619e-09,
          8.7642e-09]]), TensorCategory([2, 9, 2,  ..., 2, 9, 3]))

In [136]:
accuracy(*learn.tta())

TensorBase(0.6871)

Improve by 2%.

### Label Smoothing and mixup

In [138]:
loss = LabelSmoothingCrossEntropy() # updated loss
model = xresnet50()
dls = get_dls(path,size = 64, bs =128)
learn = Learner(dls, model = model, loss_func=loss, metrics=accuracy, cbs = MixUp)
learn.fit_one_cycle(n_epoch = 5, lr_max = 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,3.055904,3.891928,0.308439,02:32
1,2.619092,2.340232,0.557879,02:33
2,2.398292,2.141437,0.600075,02:34
3,2.24642,1.849154,0.710232,02:34
4,2.150051,1.765273,0.753174,02:34


In [139]:
learn.fit_one_cycle(n_epoch = 10, lr_max = 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,2.12156,2.128823,0.603809,02:33
1,2.246579,1.976292,0.681852,02:31
2,2.263475,2.193108,0.587752,02:30
3,2.179992,1.811961,0.71994,02:30
4,2.129698,1.79986,0.724421,02:31
5,2.068104,1.750461,0.742345,02:31
6,2.017889,1.661491,0.778939,02:32
7,1.966701,1.553827,0.820388,02:32
8,1.924457,1.524308,0.82935,02:31
9,1.905021,1.514208,0.836072,02:30


Usually for these methods, more epochs are needed to see good accuracy.