In [14]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [15]:
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

from matplotlib import pyplot as plt
import copy

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


In [16]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)


testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)


Files already downloaded and verified
Files already downloaded and verified


In [17]:
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=10, shuffle=True)
# testloader = torch.utils.data.DataLoader(testset, batch_size=10, shuffle=False)


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# foreground_classes = {'plane', 'car', 'bird'}

# background_classes = {'cat', 'deer', 'dog', 'frog', 'horse','ship', 'truck'}

# fg1,fg2,fg3 = 0,1,2

In [18]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=256,shuffle=False)

In [19]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=0)
    self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=0)
    self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=0)
    self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=0)
    self.conv5 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=0)
    self.conv6 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.batch_norm1 = nn.BatchNorm2d(32, track_running_stats = False)
    self.batch_norm2 = nn.BatchNorm2d(128, track_running_stats = False)
    self.dropout1 = nn.Dropout2d(p=0.05)
    self.dropout2 = nn.Dropout2d(p=0.1)
    self.fc1 = nn.Linear(128,64)
    self.fc2 = nn.Linear(64, 32)
    self.fc3 = nn.Linear(32, 10)


  def forward(self, x):
    x = self.conv1(x)
    x = F.relu(self.batch_norm1(x))

    x = (F.relu(self.conv2(x)))
    x = self.pool(x)
    
    x = self.conv3(x)
    x = F.relu(self.batch_norm2(x))

    x = (F.relu(self.conv4(x)))
    x = self.pool(x)
    x = self.dropout1(x)

    x = self.conv5(x)
    x = F.relu(self.batch_norm2(x))

    x = (F.relu(self.conv6(x)))
    x = self.pool(x)

    x = x.view(x.size(0), -1)

    x = self.dropout2(x)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.dropout2(x)
    x = self.fc3(x)
    return x

In [20]:
cnn_net = CNN()#.double()
cnn_net = cnn_net.to("cuda")

In [21]:
print(cnn_net)

CNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (batch_norm1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
  (batch_norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
  (dropout1): Dropout2d(p=0.05, inplace=False)
  (dropout2): Dropout2d(p=0.1, inplace=False)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=10, bias=True)
)


In [22]:
for i,j in cnn_net.state_dict().items():
  print(i)

conv1.weight
conv1.bias
conv2.weight
conv2.bias
conv3.weight
conv3.bias
conv4.weight
conv4.bias
conv5.weight
conv5.bias
conv6.weight
conv6.bias
batch_norm1.weight
batch_norm1.bias
batch_norm2.weight
batch_norm2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
fc3.weight
fc3.bias


In [23]:
for i,j in cnn_net.state_dict().items():
  if i == 'batch_norm1.weight':
    print(i,j)

batch_norm1.weight tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       device='cuda:0')


In [24]:
cnn_net.load_state_dict(torch.load("/content/drive/My Drive/Research/train_begining_layers_vs_last_layers/"+"cnn_net_6layer"+".pt"))

<All keys matched successfully>

In [25]:
correct = 0
total = 0
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        images, labels = images.to("cuda"), labels.to("cuda")
        outputs = cnn_net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the %d train images: %d %%' % (total,  100 * correct / total))
print(total,correct)

Accuracy of the network on the 50000 train images: 99 %
50000 49890


In [26]:
correct = 0
total = 0
out = []
pred = []
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to("cuda"),labels.to("cuda")
        out.append(labels.cpu().numpy())
        outputs= cnn_net(images)
        _, predicted = torch.max(outputs.data, 1)
        pred.append(predicted.cpu().numpy())
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % ( 100 * correct / total))
print(total,correct)

Accuracy of the network on the 10000 test images: 81 %
10000 8110


In [27]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to("cuda"),labels.to("cuda")
        outputs = cnn_net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 81 %
Accuracy of   car : 100 %
Accuracy of  bird : 92 %
Accuracy of   cat : 59 %
Accuracy of  deer : 76 %
Accuracy of   dog : 66 %
Accuracy of  frog : 72 %
Accuracy of horse : 83 %
Accuracy of  ship : 90 %
Accuracy of truck : 76 %


In [28]:
print(cnn_net.conv4.weight.data)
print(cnn_net.conv4.bias.data)


tensor([[[[ 0.0895, -0.0443,  0.0334],
          [ 0.0455,  0.0136,  0.0606],
          [-0.0395, -0.0712,  0.0144]],

         [[-0.0098, -0.0118, -0.0410],
          [-0.0450, -0.0729, -0.0400],
          [-0.0862,  0.0162, -0.0055]],

         [[ 0.0694,  0.0490, -0.0367],
          [ 0.0427,  0.0560, -0.0512],
          [-0.0559, -0.0446, -0.0295]],

         ...,

         [[ 0.0982,  0.0100,  0.0214],
          [ 0.0862, -0.0232, -0.0026],
          [ 0.0483, -0.0012,  0.0263]],

         [[-0.0258,  0.0180, -0.0480],
          [-0.1320, -0.0147, -0.0030],
          [ 0.0096, -0.0164, -0.0145]],

         [[ 0.0554,  0.0462,  0.0208],
          [-0.0803, -0.0194,  0.0474],
          [-0.0846, -0.0386, -0.0152]]],


        [[[-0.0350, -0.0063,  0.0325],
          [ 0.0450,  0.0508, -0.0103],
          [-0.0696, -0.0369, -0.0723]],

         [[ 0.0016, -0.0354, -0.0124],
          [-0.0080, -0.0798,  0.0282],
          [ 0.0278,  0.0849,  0.0449]],

         [[-0.0645, -0.0708, -0

In [29]:
cnn_net.conv4 = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1)).to("cuda")

In [30]:
print(cnn_net.conv4.weight.data)
print(cnn_net.conv4.bias.data)

tensor([[[[-6.7584e-03, -1.2017e-02,  1.3115e-02],
          [ 1.5455e-02, -1.9145e-03,  2.2009e-02],
          [-1.7018e-03, -4.6725e-03,  2.6122e-02]],

         [[ 1.4732e-02, -2.8897e-02,  9.3259e-03],
          [ 5.3789e-04, -2.6002e-02,  8.8322e-03],
          [ 1.2006e-02, -8.7156e-03,  2.4196e-03]],

         [[-1.4328e-02, -2.7359e-02, -2.3409e-02],
          [ 1.8124e-02, -1.3683e-02,  5.8244e-03],
          [ 1.2888e-02,  1.0070e-02, -2.9296e-02]],

         ...,

         [[ 2.4738e-02,  9.2259e-03, -2.2305e-02],
          [-1.2584e-02, -1.3320e-02,  1.2075e-02],
          [-2.2443e-02,  1.9700e-02, -7.8706e-03]],

         [[ 9.7796e-03,  2.4775e-02,  2.0759e-02],
          [ 1.9261e-02, -7.8243e-03,  1.2176e-02],
          [-1.2169e-02,  1.9483e-02, -5.1132e-03]],

         [[-9.4540e-03,  2.1776e-02, -2.8768e-02],
          [-2.6362e-02, -1.0023e-02,  2.0384e-02],
          [-9.0128e-03,  1.9403e-02,  1.2902e-02]]],


        [[[ 8.2842e-03,  2.2607e-02,  1.4064e-02],
  

In [31]:
cnn_net.conv5 = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1)).to("cuda")

In [32]:
cnn_net.conv6 = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)).to("cuda")

In [33]:
print(cnn_net.fc3.weight.data)
print(cnn_net.fc3.bias.data)

tensor([[-0.4068, -0.4362,  0.0698, -0.3009, -0.2752, -0.3439,  0.5828,  0.0313,
         -0.4780,  0.3515, -0.0035,  0.0459,  0.4003, -0.3748,  0.3472,  0.2498,
          0.4995, -0.1166,  0.4188, -0.5902, -0.2113, -0.4722,  0.4010,  0.1269,
         -0.1493, -0.1191,  0.0255,  0.3614,  0.4943, -0.0487, -0.1117,  0.1074],
        [ 0.4860, -0.1692, -0.3556, -0.5147, -0.1048,  0.5005, -0.1668, -0.1168,
          0.1488,  0.3921, -0.3384,  0.1250,  0.4053,  0.1653,  0.3789, -0.5523,
         -0.3681, -0.1684,  0.4482,  0.4026,  0.0031, -0.4201, -0.6754,  0.3405,
         -0.5888, -0.2481,  0.3360, -0.4205, -0.1408,  0.2216,  0.1392,  0.1215],
        [-0.2789,  0.4528, -0.4783, -0.4807,  0.0971, -0.5562,  0.4739,  0.5400,
          0.5873, -0.5037, -0.5427,  0.0328, -0.3043,  0.2666,  0.3060, -0.3784,
          0.4711,  0.0903, -0.5264,  0.3474, -0.1667,  0.3545,  0.3225, -0.1508,
         -0.1958,  0.3797,  0.2279, -0.0640,  0.4855,  0.1493,  0.1738,  0.1140],
        [ 0.3289,  0.4522

In [34]:
cnn_net.fc1 = nn.Linear(128,64).to("cuda")
cnn_net.fc2 = nn.Linear(64, 32).to("cuda")
cnn_net.fc3 = nn.Linear(32, 10).to("cuda")

In [35]:
print(cnn_net.fc3.weight.data)
print(cnn_net.fc3.bias.data)

tensor([[-8.4360e-02,  7.0599e-02,  1.1115e-01,  1.6262e-01,  1.1837e-01,
         -5.5130e-02,  4.9191e-03,  6.2032e-03, -1.1298e-02, -1.4745e-01,
          2.7425e-02,  6.4352e-02,  1.5184e-01,  8.0667e-02,  1.2357e-01,
          1.6944e-01, -7.1308e-02,  4.1756e-02,  4.1072e-02, -6.2947e-02,
          3.9309e-02, -1.5526e-01, -4.2489e-02,  1.1244e-01, -1.0087e-01,
          8.8317e-02,  1.6033e-01, -5.3265e-02,  8.2476e-02,  1.6181e-01,
          1.4650e-01, -4.9903e-02],
        [ 1.3040e-01, -1.1805e-01,  1.2587e-01, -9.4154e-02, -3.3288e-02,
         -1.3413e-01, -1.3299e-01, -8.7083e-02,  5.7786e-02, -7.9633e-03,
          1.1491e-01,  1.4066e-01, -6.3572e-02, -8.0789e-02, -1.0760e-01,
         -9.6073e-02, -4.7574e-02, -5.8173e-02,  9.5207e-02,  5.3442e-05,
         -8.2671e-02, -1.5538e-01, -7.1725e-02,  3.9286e-02,  6.8651e-02,
          1.4928e-02,  6.0185e-02,  2.6514e-02,  5.7601e-02,  1.3290e-01,
         -1.4836e-03, -1.4410e-01],
        [-1.5814e-01,  1.4982e-01,  3.87

In [36]:
cnn_net.conv1.weight.requires_grad  = False
cnn_net.conv1.bias.requires_grad  = False
cnn_net.conv2.weight.requires_grad  = False
cnn_net.conv2.bias.requires_grad  = False
cnn_net.conv3.weight.requires_grad  = False
cnn_net.conv3.bias.requires_grad  = False

In [37]:
for param in cnn_net.parameters():
  print(param.requires_grad)

False
False
False
False
False
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [38]:
correct = 0
total = 0
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        images, labels = images.to("cuda"), labels.to("cuda")
        outputs = cnn_net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the %d train images: %d %%' % (total,  100 * correct / total))
print(total,correct)

Accuracy of the network on the 50000 train images: 10 %
50000 5065


In [39]:
correct = 0
total = 0
out = []
pred = []
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to("cuda"),labels.to("cuda")
        out.append(labels.cpu().numpy())
        outputs= cnn_net(images)
        _, predicted = torch.max(outputs.data, 1)
        pred.append(predicted.cpu().numpy())
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % ( 100 * correct / total))
print(total,correct)

Accuracy of the network on the 10000 test images: 10 %
10000 1036


In [40]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to("cuda"),labels.to("cuda")
        outputs = cnn_net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane :  0 %
Accuracy of   car : 15 %
Accuracy of  bird : 23 %
Accuracy of   cat :  0 %
Accuracy of  deer :  0 %
Accuracy of   dog :  0 %
Accuracy of  frog : 16 %
Accuracy of horse : 33 %
Accuracy of  ship :  0 %
Accuracy of truck : 11 %


In [41]:
import torch.optim as optim
criterion_cnn = nn.CrossEntropyLoss()
optimizer_cnn = optim.SGD(cnn_net.parameters(), lr=0.01, momentum=0.9)

In [42]:
acti = []
loss_curi = []
epochs = 300
for epoch in range(epochs): # loop over the dataset multiple times
    ep_lossi = []

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to("cuda"),labels.to("cuda")

        # zero the parameter gradients
        optimizer_cnn.zero_grad()

        # forward + backward + optimize
        outputs = cnn_net(inputs)
        loss = criterion_cnn(outputs, labels)
        loss.backward()
        optimizer_cnn.step()

        # print statistics
        running_loss += loss.item()
        mini_batch = 50
        if i % mini_batch == mini_batch-1:    # print every 50 mini-batches
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / mini_batch))
            ep_lossi.append(running_loss/mini_batch) # loss per minibatch
            running_loss = 0.0
            
    if(np.mean(ep_lossi) <= 0.01):
      break;
    loss_curi.append(np.mean(ep_lossi))   #loss per epoch            

print('Finished Training')

[1,    50] loss: 2.294
[1,   100] loss: 2.129
[1,   150] loss: 1.562
[2,    50] loss: 0.995
[2,   100] loss: 0.885
[2,   150] loss: 0.824
[3,    50] loss: 0.695
[3,   100] loss: 0.659
[3,   150] loss: 0.657
[4,    50] loss: 0.530
[4,   100] loss: 0.527
[4,   150] loss: 0.515
[5,    50] loss: 0.440
[5,   100] loss: 0.442
[5,   150] loss: 0.451
[6,    50] loss: 0.352
[6,   100] loss: 0.366
[6,   150] loss: 0.388
[7,    50] loss: 0.289
[7,   100] loss: 0.310
[7,   150] loss: 0.321
[8,    50] loss: 0.251
[8,   100] loss: 0.268
[8,   150] loss: 0.272
[9,    50] loss: 0.197
[9,   100] loss: 0.217
[9,   150] loss: 0.232
[10,    50] loss: 0.178
[10,   100] loss: 0.175
[10,   150] loss: 0.185
[11,    50] loss: 0.145
[11,   100] loss: 0.162
[11,   150] loss: 0.171
[12,    50] loss: 0.128
[12,   100] loss: 0.127
[12,   150] loss: 0.149
[13,    50] loss: 0.103
[13,   100] loss: 0.114
[13,   150] loss: 0.132
[14,    50] loss: 0.100
[14,   100] loss: 0.103
[14,   150] loss: 0.110
[15,    50] loss: 0

In [44]:
torch.save(cnn_net.state_dict(),"/content/drive/My Drive/Research/train_begining_layers_vs_last_layers/weights"+"CIFAR10_first3layer_fixed_cnn6layer"+".pt")

In [45]:
correct = 0
total = 0
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        images, labels = images.to("cuda"), labels.to("cuda")
        outputs = cnn_net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the %d train images: %d %%' % (total,  100 * correct / total))
print(total,correct)

Accuracy of the network on the 50000 train images: 99 %
50000 49877


In [46]:
correct = 0
total = 0
out = []
pred = []
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to("cuda"),labels.to("cuda")
        out.append(labels.cpu().numpy())
        outputs= cnn_net(images)
        _, predicted = torch.max(outputs.data, 1)
        pred.append(predicted.cpu().numpy())
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % ( 100 * correct / total))
print(total,correct)

Accuracy of the network on the 10000 test images: 80 %
10000 8086


In [47]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to("cuda"),labels.to("cuda")
        outputs = cnn_net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 68 %
Accuracy of   car : 92 %
Accuracy of  bird : 84 %
Accuracy of   cat : 59 %
Accuracy of  deer : 69 %
Accuracy of   dog : 73 %
Accuracy of  frog : 66 %
Accuracy of horse : 83 %
Accuracy of  ship : 85 %
Accuracy of truck : 88 %
