## Using different DR technique (ICA) on saved neural network 

In [3]:
!pip install scipy



In [4]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import sklearn as sk
from sklearn import decomposition as dec
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import scipy as sc


In [14]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=5)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


Files already downloaded and verified
Files already downloaded and verified


#### Loading saved network

In [15]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=12, kernel_size=(5, 5))
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        self.conv2 = nn.Conv2d(
            in_channels=12, out_channels=16, kernel_size=(5, 5))
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        # self.conv1 = nn.Conv2d(3, 6, 5)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(6, 16, 5)
        # self.fc1 = nn.Linear(16 * 5 * 5, 120)
        # self.fc2 = nn.Linear(120, 84)
        # self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [10]:
PATH = './cifar_net.pt'

In [53]:
model = NN()
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [54]:
print(model.eval())

NN(
  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [18]:
test_data = iter(testloader)
images_test, labels_test = test_data.next()

In [55]:
the_output = model(images_test)

acc, lb_pred = torch.max(the_output, 1)
print(lb_pred)



tensor([3, 8, 0, 0])


In [56]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs_normal = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs_normal.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(
    f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')



Accuracy of the network on the 10000 test images: 64 %


In [57]:
fc1_w = model.fc1.weight.detach().numpy()
fc2_w = model.fc2.weight.detach().numpy()
fc3_w = model.fc3.weight.detach().numpy()


print(fc1_w.shape)


(120, 400)


In [58]:

fc1_ICA = dec.FastICA(n_components=60,
            whiten='unit-variance')

fc1_reduced = fc1_ICA.fit_transform(fc1_w.T)
fc1_reduced  = torch.tensor(fc1_reduced.T ,dtype=torch.float)
   

In [60]:
fc2_ICA = dec.FastICA(n_components=60,
            whiten='unit-variance')
fc2_reduced = fc2_ICA.fit_transform(fc2_w)
print(fc2_reduced.shape)

fc2_ICA2 = dec.FastICA(n_components=42, whiten='unit-variance')
fc2_reduced = fc2_ICA2.fit_transform(fc2_reduced.T)
fc2_reduced = torch.tensor(fc2_reduced.T,  dtype=torch.float)


(84, 60)


In [61]:
fc3_w = model.fc3.weight.detach().numpy()
fc3_w = fc3_w.reshape(fc3_w.shape[0] * 2, -1)
fc3_ICA = dec.FastICA(n_components=10, whiten='unit-variance', max_iter=300)
fc3_reduced = fc3_ICA.fit_transform(fc3_w.T)
fc3_reduced = torch.tensor(fc3_reduced.T, dtype=torch.float)
print(fc3_reduced.shape)


torch.Size([10, 42])


In [62]:
print(fc1_reduced.shape)
print(fc2_reduced.shape)
print(fc3_reduced.shape)

torch.Size([60, 400])
torch.Size([42, 60])
torch.Size([10, 42])


In [64]:
model.fc1 = nn.Linear(400, 60, bias=True)
model.fc2 = nn.Linear(60,42,bias=True)
model.fc3 = nn.Linear(42,10, bias=True)

model.fc1.weight = nn.Parameter(fc1_reduced)
model.fc2.weight = nn.Parameter(fc2_reduced)
model.fc3.weight = nn.Parameter(fc3_reduced)

In [71]:
the_output = model(images_test)

acc, lb_pred = torch.max(the_output, 1)
print(acc)
print(lb_pred)

tensor([3.6445, 6.9086, 0.7945, 5.2515], grad_fn=<MaxBackward0>)
tensor([3, 8, 0, 0])


In [70]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs_normal = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs_normal.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(
    f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')


Accuracy of the network on the 10000 test images: 62 %


In [69]:
from torch.profiler import profile, record_function, ProfilerActivity
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

print(optimizer)

running_loss = 0.0
total = 0.0
correct = 0.0
loss_nodr =[]
acc_nodr = []
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

for epoch in range(5):
    
    print('epoch:', epoch)
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad(True)
        with torch.set_grad_enabled(True):
            with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
             outputs = model(inputs).to(device)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        running_loss += loss.item()
        if i % 2000 == 1999:
            loss_temp = running_loss / 2000
            acc_temp = 100 * correct / total
            loss_nodr.append(loss_temp)
            acc_nodr.append(acc_temp)
            print(
                f'[{epoch + 1}, {i + 1:5d}] loss: {loss_temp:.3f} acc: {acc_temp:.3f}')
            running_loss = 0.0
            correct = 0.0
            total = 0.0
print('Finished Training')


SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
epoch: 0


  warn("CUDA is not available, disabling CUDA profiling")


[1,  2000] loss: 1.376 acc: 51.750
[1,  4000] loss: 1.155 acc: 59.362
[1,  6000] loss: 1.134 acc: 60.413
[1,  8000] loss: 1.120 acc: 61.112
[1, 10000] loss: 1.086 acc: 61.950
[1, 12000] loss: 1.097 acc: 62.600
epoch: 1
[2,  2000] loss: 1.283 acc: 64.640
[2,  4000] loss: 1.012 acc: 64.700
[2,  6000] loss: 1.005 acc: 65.850
[2,  8000] loss: 0.998 acc: 65.188
[2, 10000] loss: 1.010 acc: 64.350
[2, 12000] loss: 1.023 acc: 64.550
epoch: 2
[3,  2000] loss: 1.174 acc: 67.530
[3,  4000] loss: 0.939 acc: 66.888
[3,  6000] loss: 0.949 acc: 67.562
[3,  8000] loss: 0.963 acc: 66.875
[3, 10000] loss: 0.921 acc: 68.213
[3, 12000] loss: 0.963 acc: 66.713
epoch: 3
[4,  2000] loss: 1.118 acc: 69.110
[4,  4000] loss: 0.904 acc: 68.750
[4,  6000] loss: 0.933 acc: 68.050
[4,  8000] loss: 0.900 acc: 68.700
[4, 10000] loss: 0.917 acc: 68.688
[4, 12000] loss: 0.914 acc: 68.513
epoch: 4
[5,  2000] loss: 1.084 acc: 70.520
[5,  4000] loss: 0.856 acc: 70.450
[5,  6000] loss: 0.873 acc: 69.787
[5,  8000] loss: 0.

In [73]:
path_2 = './model_ICA_reduced.pt'
torch.save(model.state_dict, path_2)

#### Check the capacity of the model after applying FastICA onto the weights matrix


In [202]:
data_sample, labels = iter(testloader).next()

for l in labels:
    print('classes of data sample: ', classes[l])

classes of data sample:  cat
classes of data sample:  ship
classes of data sample:  ship
classes of data sample:  plane


In [200]:


print(model.fc1.bias)
bias_n = model.fc1.bias
bias_n = bias_n.detach().numpy()
bias_n = bias_n.reshape(60, -1)
print(bias_n.shape)

bias_transformer = dec.FastICA(n_components=1,
            random_state=0,
            whiten='unit-variance',
            tol=2,
            max_iter=500)
aa = bias_transformer.fit_transform(bias_n)
aa = aa.flatten()
model.fc1.bias = nn.Parameter(torch.tensor(aa, dtype=torch.float))
print('net bias updated:', model.fc1.bias.shape)


bias_n = model.fc2.bias
bias_n = bias_n.detach().numpy()
bias_n = bias_n.reshape(42, -1)
print(bias_n.shape)

bias_transformer = dec.FastICA(n_components=1,
            random_state=0,
            whiten='unit-variance',
            tol=2,
            max_iter=500)
aa = bias_transformer.fit_transform(bias_n)
aa = aa.flatten()
model.fc2.bias = nn.Parameter(torch.tensor(aa, dtype=torch.float))
print('net bias updated:', model.fc2.bias.shape)

Parameter containing:
tensor([ 0.0106, -0.0324,  0.0332, -0.0228, -0.1344, -0.0133,  0.0336,  0.1185,
         0.1332, -0.1631, -0.1097,  0.1063, -0.1316, -0.0879, -0.0189,  0.0023,
        -0.1091, -0.0054, -0.0687, -0.2570, -0.1722,  0.0175,  0.0333,  0.0006,
         0.1241,  0.0555,  0.1032, -0.0437,  0.2082, -0.0532, -0.0901, -0.2231,
         0.0722, -0.0443,  0.3296,  0.0758, -0.3826,  0.0570,  0.0889,  0.1991,
         0.1702, -0.0960,  0.0980, -0.2003, -0.1400,  0.0505, -0.1144,  0.0542,
         0.0348,  0.2222,  0.0564, -0.0176,  0.2313,  0.0844, -0.1637,  0.1060,
        -0.1245,  0.0091,  0.0931,  0.0072], requires_grad=True)
(60, 1)
net bias updated: torch.Size([60])
(42, 1)
net bias updated: torch.Size([42])


In [205]:
result = model(data_sample)
print(result.shape)

acc, lb_prediction  =  torch.max(result, 1)
print(lb_prediction)

for lb in lb_prediction:
    print('predicted classes: ', classes[lb])

torch.Size([4, 10])
tensor([3, 6, 7, 3])
predicted classes:  cat
predicted classes:  frog
predicted classes:  horse
predicted classes:  cat
