In [50]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("./cifar10"))

# Any results you write to the current directory are saved as output.

['.DS_Store', 'test', 'labels.txt', 'train']


In [51]:
!pip list

Package             Version    
------------------- -----------
appnope             0.1.0      
bleach              2.0.0      
Bottleneck          1.2.1      
certifi             2018.4.16  
chardet             3.0.4      
cycler              0.10.0     
cymem               2.0.2      
cytoolz             0.9.0.1    
dataclasses         0.6        
decorator           4.0.11     
dill                0.2.8.2    
entrypoints         0.2.2      
fastai              1.0.28     
fastprogress        0.1.15     
html5lib            0.999999999
idna                2.7        
ipykernel           4.6.1      
ipython             6.0.0      
ipython-genutils    0.2.0      
ipywidgets          6.0.0      
jedi                0.10.2     
Jinja2              2.9.6      
jsonschema          2.6.0      
jupyter             1.0.0      
jupyter-client      5.0.1      
jupyter-console     5.1.0      
jupyter-core        4.3.0      
jupyterthemes       0.19.1     
kiwisolver          1.0.1      
lesscpy 




## *Speed tricks:*
- Use Pillow-SIMD instead of PIL. OpenCV is slow.
- Use half / mixed precision and double your batch size to achieve more than 30-40% speedup in training and also to lower model size for on-device inference
- [CV] Go for distributed IF AND ONLY IF you have a super large dataset - Large by size and number of images. Be rest assured the same hyperparameters might not be optimal for both the training modes.
- http://www.fast.ai/2018/07/02/adam-weight-decay/

## Datasets for experiments and exercises:
Datasets:
    - Cifar: http://files.fast.ai/data/
    - Indian Snacks: https://github.com/NavinManaswi/IndianSnacks
- To create your own dataset - https://github.com/hardikvasa/google-images-download

# References
## 7. PyTorch Tips:
- Always remember to do `.eval()` before inference.
- Always remember to zero your gradients using `.zero_grad()` in your training loop. Gradients accumulate (sum) by default.
- For complete reproducitibility on GPU, disable cuDNN `torch.backends.cudnn.enabled = False`
- `num_workers` and `pin_memory` enable fast data-loading from disk and transfer between RAM & GPU.

In [8]:
# Extract cifar zip file
! tar -xf ./cifar10.tgz

In [52]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torchvision import models

In [10]:
torch.__version__, torch.version.cuda, torch.cuda.is_available()

('0.3.0.post4', None, False)

# Linear Regression

In [11]:
class LinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 1)
        
    def forward(self, features):
        return self.fc1(features)
    
m = LinearRegression()

In [12]:
np.random.seed(42)
data = np.random.rand(10, 2)
y = data[:,0] + 1.65*data[:,1]

data = Variable(torch.FloatTensor(data), requires_grad=True)
y = Variable(torch.FloatTensor(y)).view(-1,1)#, volatile=True)

In [17]:
epochs = 500

crit = torch.nn.MSELoss()
opt = optim.SGD(m.parameters(), lr=0.1)

for t, i in enumerate(range(epochs)):
    y_pred = m(data)
    loss = crit(y_pred, y)
    if t % 25 == 0:
        print(t, loss.data)
    
    opt.zero_grad()
    loss.backward()
    opt.step()

0 
1.00000e-09 *
  6.5013
[torch.FloatTensor of size 1]

25 
1.00000e-09 *
  4.2585
[torch.FloatTensor of size 1]

50 
1.00000e-09 *
  2.7970
[torch.FloatTensor of size 1]

75 
1.00000e-09 *
  1.8319
[torch.FloatTensor of size 1]

100 
1.00000e-09 *
  1.2044
[torch.FloatTensor of size 1]

125 
1.00000e-10 *
  7.9219
[torch.FloatTensor of size 1]

150 
1.00000e-10 *
  5.2059
[torch.FloatTensor of size 1]

175 
1.00000e-10 *
  3.4120
[torch.FloatTensor of size 1]

200 
1.00000e-10 *
  2.2424
[torch.FloatTensor of size 1]

225 
1.00000e-10 *
  1.4785
[torch.FloatTensor of size 1]

250 
1.00000e-11 *
  9.7508
[torch.FloatTensor of size 1]

275 
1.00000e-11 *
  6.4065
[torch.FloatTensor of size 1]

300 
1.00000e-11 *
  4.3797
[torch.FloatTensor of size 1]

325 
1.00000e-11 *
  2.9293
[torch.FloatTensor of size 1]

350 
1.00000e-11 *
  1.9039
[torch.FloatTensor of size 1]

375 
1.00000e-11 *
  1.1566
[torch.FloatTensor of size 1]

400 
1.00000e-12 *
  7.8129
[torch.FloatTensor of size 1]

42

In [28]:
list(m.parameters())

[Parameter containing:
  1.0000  1.6500
 [torch.FloatTensor of size 1x2], Parameter containing:
 1.00000e-06 *
   3.8803
 [torch.FloatTensor of size 1]]

### Predictions

In [29]:
t = torch.FloatTensor([[1, 2], [3,4]])
test = Variable(t)
test

Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]

In [30]:
# Switch the mode of the model into evaluation. As BatchNorm and Dropout layers behave differently during training and inference, this is critical!
m.eval()

LinearRegression(
  (fc1): Linear(in_features=2, out_features=1)
)

In [31]:
m(test)

Variable containing:
 4.3000
 9.6000
[torch.FloatTensor of size 2x1]

### Save the weights
This PyTorch model can be reused for scoring predictions during test time. For this, the weights of the model can be saved to disk.

In [32]:
m.state_dict()

OrderedDict([('fc1.weight', 
               1.0000  1.6500
              [torch.FloatTensor of size 1x2]), ('fc1.bias', 
              1.00000e-06 *
                3.8803
              [torch.FloatTensor of size 1])])

In [33]:
torch.save(m.state_dict(), 'linear.pth')

In [34]:
del m

In [35]:
m = LinearRegression()
m.state_dict() # Random weights are assigned on initialisation.

OrderedDict([('fc1.weight', 
               0.1714  0.5793
              [torch.FloatTensor of size 1x2]), ('fc1.bias', 
              -0.2821
              [torch.FloatTensor of size 1])])

In [36]:
m.load_state_dict(torch.load('linear.pth'))

In [37]:
m.state_dict()

OrderedDict([('fc1.weight', 
               1.0000  1.6500
              [torch.FloatTensor of size 1x2]), ('fc1.bias', 
              1.00000e-06 *
                3.8803
              [torch.FloatTensor of size 1])])

# Logistic Regression

In [38]:
class LogisticRegression(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 1)
        
    def forward(self, features):
        return torch.sigmoid(self.fc1(features))
    
m = LogisticRegression()

In [39]:
np.random.seed(42)
data = np.random.rand(10, 2)
y = data[:,0] + 1.65*data[:,1]
y = 1. / (1 + np.exp(-y))
y = (y - min(y)) / (max(y) - min(y))
y = np.digitize(y, [0.5])

data = Variable(torch.FloatTensor(data), requires_grad=True)
y = Variable(torch.FloatTensor(y))#, volatile=True)
y = y.view(-1, 1)

In [42]:
epochs = 5000
crit = torch.nn.BCELoss()
opt = optim.SGD(m.parameters(), lr=0.5)

for t, i in enumerate(range(epochs)):
    y_pred = m(data)
    loss = crit(y_pred, y)
    if t % 100 == 0:
        print(t, loss.data)
    
    opt.zero_grad()
    loss.backward()
    opt.step()

0 
 1.0035
[torch.FloatTensor of size 1]

100 
 0.3540
[torch.FloatTensor of size 1]

200 
 0.2668
[torch.FloatTensor of size 1]

300 
 0.2213
[torch.FloatTensor of size 1]

400 
 0.1935
[torch.FloatTensor of size 1]

500 
 0.1745
[torch.FloatTensor of size 1]

600 
 0.1606
[torch.FloatTensor of size 1]

700 
 0.1499
[torch.FloatTensor of size 1]

800 
 0.1414
[torch.FloatTensor of size 1]

900 
 0.1344
[torch.FloatTensor of size 1]

1000 
 0.1284
[torch.FloatTensor of size 1]

1100 
 0.1233
[torch.FloatTensor of size 1]

1200 
 0.1189
[torch.FloatTensor of size 1]

1300 
 0.1149
[torch.FloatTensor of size 1]

1400 
 0.1114
[torch.FloatTensor of size 1]

1500 
 0.1082
[torch.FloatTensor of size 1]

1600 
 0.1054
[torch.FloatTensor of size 1]

1700 
 0.1027
[torch.FloatTensor of size 1]

1800 
 0.1003
[torch.FloatTensor of size 1]

1900 
1.00000e-02 *
  9.8001
[torch.FloatTensor of size 1]

2000 
1.00000e-02 *
  9.5891
[torch.FloatTensor of size 1]

2100 
1.00000e-02 *
  9.3918
[torch.F

In [43]:
list(m.parameters())

[Parameter containing:
  10.9111  17.4337
 [torch.FloatTensor of size 1x2], Parameter containing:
 -10.7229
 [torch.FloatTensor of size 1]]

### Predictions

In [44]:
t = torch.FloatTensor([[0.14, 0.14], [0.24,0.67]])
test = Variable(t)
test

Variable containing:
 0.1400  0.1400
 0.2400  0.6700
[torch.FloatTensor of size 2x2]

In [45]:
m.eval()

LogisticRegression(
  (fc1): Linear(in_features=2, out_features=1)
)

In [46]:
m(test)

Variable containing:
 0.0012
 0.9728
[torch.FloatTensor of size 2x1]

# DataLoader

In [None]:
! ls cifar10/
# ! rm cifar10/labels.txt

In [None]:
!ls cifar10/train

In [None]:
with open("cifar10/labels.txt", "r") as f:
    labels = f.read().strip().split('\n')

In [None]:
labels

In [None]:
data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_dataset = datasets.ImageFolder('cifar10/train', transform=data_transforms)
train_loader = DataLoader(train_dataset, batch_size=32, num_workers=2, pin_memory=True, shuffle=True)

test_dataset = datasets.ImageFolder('cifar10/test', transform=data_transforms)
test_loader = DataLoader(test_dataset, batch_size=64, num_workers=2, pin_memory=True, shuffle=False)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
dataiter = iter(train_loader)
images, classes = dataiter.next()
images = images.numpy()

In [None]:
classes

In [None]:
fig = plt.figure(figsize=(25, 4))
# display 20 images
for idx in np.arange(20):
    ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
    plt.imshow(np.transpose(images[idx], (1, 2, 0)))
    ax.set_title(labels[classes[idx]])

In [None]:
# A rudimentary network
class simplenet(nn.Module):
    
    def __init__(self):
        super(simplenet, self).__init__()
        #32*32*3
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        #16*16*64
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        
        self.pool = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(128 * 8 * 8, 500)
        self.fc2 = nn.Linear(500, 10)
        self.dropout = nn.Dropout(0.25)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        x = x.view(-1, 128 * 8 * 8)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
            

In [None]:
m = simplenet().cuda()

In [None]:
epochs = 3
crit = nn.CrossEntropyLoss()
opt = optim.SGD(m.parameters(), lr=0.1)

for t, i in enumerate(range(epochs)):

    train_loss = 0.0
    match = 0
    m.train()
    for data, y in train_loader:
        data, y = data.cuda(), y.cuda()
        opt.zero_grad()
        y_pred = m(data)
        loss = crit(y_pred, y)
        
        _, pred = torch.max(y_pred, 1)
        match += np.sum(pred.eq(y.data.view_as(pred)).cpu().numpy())
        
        loss.backward()
        opt.step()
        train_loss += loss.item()*data.size(0)
    train_loss = train_loss/len(train_loader.dataset)
    train_accuracy = match/len(train_loader.dataset)
    
    print(t, train_loss, train_accuracy)

In [None]:
torch.save(m.state_dict(), 'model_cifar.pth')

In [None]:
del m

In [None]:
m = simplenet().cuda()

In [None]:
m.load_state_dict(torch.load('model_cifar.pth'))

In [None]:
m.state_dict()

In [None]:
# test_loss = 0.0
match = 0
m.eval()
for data, y in test_loader:
    data, y = data.cuda(), y.cuda()
    y_pred = m(data)
    
#     loss = crit(y_pred, y)
#     test_loss += loss.item()*data.size(0)
    
    _, pred = torch.max(y_pred, 1)
    match += np.sum(pred.eq(y.data.view_as(pred)).cpu().numpy())

# test_loss = test_loss/len(test_loader.dataset)
test_accuracy = match/len(test_loader.dataset)

In [None]:
test_accuracy

In [None]:
# add validation - yes p0
# comment the code - p3
# transfer learning - Resnet50 p1
# List all the pre trained models p2


# Transfer learning

In [53]:
train_transforms = transforms.Compose([transforms.RandomResizedCrop(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean=[0.4914, 0.48216, 0.44653],
                                                            std=[0.24703, 0.24349, 0.26159])])

test_transforms = transforms.Compose([transforms.Resize(255),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.4914, 0.48216, 0.44653],
                                                           std=[0.24703, 0.24349, 0.26159])])

train_dataset = datasets.ImageFolder('cifar10/train', transform=train_transforms)
train_loader = DataLoader(train_dataset, batch_size=32, num_workers=2, pin_memory=True, shuffle=True)

test_dataset = datasets.ImageFolder('cifar10/test', transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=64, num_workers=2, pin_memory=True, shuffle=False)

In [54]:
m = models.resnet18(pretrained=True)

AttributeError: module 'torch.nn.init' has no attribute 'kaiming_normal_'

In [55]:
m

LogisticRegression(
  (fc1): Linear(in_features=2, out_features=1)
)

In [None]:
m.fc

In [None]:
for param in m.parameters():
    param.requires_grad = False
from collections import OrderedDict
fc = nn.Sequential(OrderedDict([
                          ('output', nn.Linear(512, 10))
                          ]))
m.fc = fc

In [None]:
m = m.cuda()

In [None]:
for p in m.parameters():
    print(p)

In [None]:
epochs = 3
crit = nn.CrossEntropyLoss()
opt = optim.SGD(m.fc.parameters(), lr=0.1)

for t, i in enumerate(range(epochs)):

    train_loss = 0.0
    match = 0
    for data, y in train_loader:
        data, y = data.cuda(), y.cuda()
        opt.zero_grad()
        y_pred = m(data)
        loss = crit(y_pred, y)
        
        _, pred = torch.max(y_pred, 1)
        match += np.sum(pred.eq(y.data.view_as(pred)).cpu().numpy())
        
        loss.backward()
        opt.step()
        train_loss += loss.item()*data.size(0)
    train_loss = train_loss/len(train_loader.dataset)
    train_accuracy = match/len(train_loader.dataset)
    
    print(t, train_loss, train_accuracy)

https://pytorch.org/docs/stable/torchvision/models.html