In [93]:
import torch as tch
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import sklearn as sk
from sklearn import decomposition as dec
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt



In [101]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(
            in_channels=3, out_channels=12, kernel_size=(5, 5))
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        self.conv2 = nn.Conv2d(
            in_channels=12, out_channels=16, kernel_size=(5, 5))
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        # self.conv1 = nn.Conv2d(3, 6, 5)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(6, 16, 5)
        # self.fc1 = nn.Linear(16 * 5 * 5, 120)
        # self.fc2 = nn.Linear(120, 84)
        # self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [85]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                         shuffle=True, num_workers=5)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


Files already downloaded and verified
Files already downloaded and verified


In [103]:
path = './cifar_net.pt'
model = NN()
# model.load_state_dict(torch.load(path))
model_small = NN()
# model_small.load_state_dict(torch.load(path))


In [87]:
model.eval()

NN(
  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [106]:
# first layer 
fc_W = model_small.fc1.weight.detach().numpy()
new_components = int(fc_W.shape[0]/2)
reduction = dec.PCA(n_components=new_components, svd_solver='full')
fc_1_reduced = reduction.fit_transform(fc_W.T)
fc_1_reduced = torch.tensor(fc_1_reduced.T)
print(fc_1_reduced.shape)

bias_fc1 = model_small.fc1.bias

# print(bias_fc1.shape)
# bias_fc1 = bias_fc1.reshape(90, -1)
# bias_reduction = dec.PCA(n_components=1)
# fc1_bias_reduced = bias_reduction.fit_transform(bias_fc1.detach().numpy())

# fc1_bias_reduced = torch.tensor(fc1_bias_reduced.flatten())
# print('reduced bias:', fc1_bias_reduced.shape)


torch.Size([60, 400])


In [107]:
#second layer 
fc2_W = model_small.fc2.weight.detach().numpy()
PCA_fc2 = dec.PCA(n_components=new_components, svd_solver='full')
fc2_reduced = PCA_fc2.fit_transform(fc2_W)

fc2_reduced = fc2_reduced.T
PCA2_fc2 = dec.PCA(n_components=int(fc2_reduced.shape[1]/2), svd_solver='full')
fc2_reduced = PCA2_fc2.fit_transform(fc2_reduced)
fc2_reduced =  torch.tensor(fc2_reduced.T)
print(fc2_reduced.shape)

torch.Size([42, 60])


In [108]:
# if n_components/2 is bigger than n_features then dont transpose, but perform reshaping and reduction 

fc3_w = model_small.fc3.weight.detach().numpy()
fc3_w = fc3_w.reshape([fc3_w.shape[0] * 2, -1])
print(fc3_w.shape)
fc3_w = fc3_w.T
PCA_fc3 = dec.PCA(n_components=int(fc3_w.shape[1]/2), svd_solver='full')
fc3_w_reduced = PCA_fc3.fit_transform(fc3_w)
fc3_w_reduced = torch.tensor(fc3_w_reduced.T)

print(fc3_w_reduced.shape)


(20, 42)
torch.Size([10, 42])


In [109]:
#recreating new network 
model_small.fc1 = nn.Linear(in_features=400, out_features=60, bias=True)
model_small.fc1.weight = nn.Parameter(fc_1_reduced)
# model_small.fc1.bias = nn.Parameter(fc1_bias_reduced)

model_small.fc2 = nn.Linear(in_features=60, out_features=42, bias=True)
model_small.fc2.weight = nn.Parameter(fc2_reduced)


model_small.fc3 = nn.Linear(in_features=42, out_features=10, bias=True)
model_small.fc3.weight = nn.Parameter(fc3_w_reduced)


In [110]:
model.eval()

NN(
  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [111]:
model_small.eval()

NN(
  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=60, bias=True)
  (fc2): Linear(in_features=60, out_features=42, bias=True)
  (fc3): Linear(in_features=42, out_features=10, bias=True)
)

In [112]:
dataset = iter(testloader)
in_img, lbl = dataset.next()
outputl = model_small(in_img)

acc,temp_pred = torch.max(outputl,dim=1)
print(acc)

for a in temp_pred:
    print(classes[a])


tensor([15.8766, 34.9308,  7.3381, 22.3589], grad_fn=<MaxBackward0>)
plane
plane
plane
plane


In [52]:
outputl = model(in_img)

acc,temp_pred = torch.max(outputl,dim=1)
print(acc)

for a in temp_pred:
    print(classes[a])


tensor([4.0205, 7.7926, 5.0547, 4.5317], grad_fn=<MaxBackward0>)
cat
ship
plane
plane


In [22]:
#reduction of the second layer of the network 
fc2_w = model.fc2.weight.detach().numpy()
fc2_PCA = dec.PCA(n_components=int(84/2))
fc2_layer_reduced = fc2_PCA.fit_transform(fc2_w.T)
print(fc2_layer_reduced.shape)
fc2_layer_reduced = fc2_layer_reduced.T
print('fc2_layer_reduced ', fc2_layer_reduced.shape)

#perform multiplication of old matrix and reduced matrix


(120, 42)
fc2_layer_reduced  (42, 120)


In [21]:
fc3 = model.fc3.weight
print(fc3.shape)

new_fc3 = torch.matmul(fc3, X)
print('new matrix of fc3 layer: ', new_fc3.shape)

##conduct cosine similarity by comparing new matrix and X matrix

E_temp = torch.matmul(new_fc3.T, fc3)
print(E_temp.shape)

#find similarities:
cos_fc3 = nn.CosineSimilarity(dim=1)
fc3_sim = cos_fc3(X, E_temp.T)

print(fc3_sim)

total_fc3 = 0

for a in fc3_sim:
    total_fc3 +=a
total_fc3 = total_fc3/len(fc3_sim)
print(total_fc3)


torch.Size([10, 84])
new matrix of fc3 layer:  torch.Size([10, 42])
torch.Size([42, 84])
tensor([ 0.8395,  0.8469,  0.9454,  0.6472,  0.7083,  0.7650,  0.5385,  0.8060,
         0.9144,  0.6490,  0.9273,  0.6979,  0.6682,  0.3946,  0.6890,  0.7483,
         0.2198,  0.8316,  0.6959,  0.8775,  0.6261,  0.3385,  0.6635,  0.6180,
         0.8512,  0.6699,  0.9532,  0.4287,  0.8352,  0.8731,  0.1903,  0.4935,
         0.8885, -0.3047,  0.2722,  0.6279,  0.8193,  0.4788,  0.8583,  0.7888,
         0.8218,  0.8430,  0.6485,  0.9124,  0.8887,  0.6772,  0.8583,  0.9225,
         0.8845,  0.3520,  0.8102,  0.8137,  0.7740,  0.8066,  0.8770,  0.9296,
         0.8875,  0.8340,  0.3671,  0.8011,  0.7058,  0.2967,  0.6892,  0.8286,
         0.5859,  0.8805,  0.7201,  0.6430,  0.7113,  0.0725, -0.1680,  0.7011,
         0.7720,  0.8795,  0.6934,  0.7270,  0.4572,  0.7839,  0.7352,  0.8298,
         0.5066,  0.4515,  0.5519,  0.7777], grad_fn=<SumBackward1>)
tensor(0.6776, grad_fn=<DivBackward0>)


In [28]:
# reassinging the updated weight matrices to the model
print(model.fc1.weight.shape)
print(model.fc2.weight.shape)
print(model.fc3.weight.shape)
print('----------- reduced')

fc_1_weight = fc_1_reduced.T
fc_2_weight = torch.tensor(fc2_layer_reduced.T)

print(fc_1_weight.shape)
print(fc_2_weight.shape)
print(new_fc3.shape)



print('reasigning modified matrix to the relevant parameters:')

model.fc1.weight = nn.Parameter(fc_1_weight)
model.fc2.weight = nn.Parameter(fc_2_weight)
model.fc3.weight = nn.Parameter(new_fc3)

print(model.fc1.weight.shape)
print(model.fc2.weight.shape)
print(model.fc3.weight.shape)


torch.Size([400, 60])
torch.Size([60, 42])
torch.Size([10, 42])
----------- reduced
torch.Size([60, 400])
torch.Size([42, 60])
torch.Size([10, 42])
reasigning modified matrix to the relevant parameters:
torch.Size([60, 400])
torch.Size([42, 60])
torch.Size([10, 42])


In [33]:
bias_fc2 = model.fc2.bias
print(bias_fc2.shape)


print(bias_fc2.shape)
bias_fc2 = bias_fc2.reshape(42, -1)
bias_fc2_reduction = dec.PCA(n_components=1)
fc2_bias_reduced = bias_fc2_reduction.fit_transform(bias_fc2.detach().numpy())

fc2_bias_reduced = torch.tensor(fc2_bias_reduced.flatten())
print(fc2_bias_reduced.shape)


torch.Size([84])
torch.Size([84])
torch.Size([42])


In [35]:
model.fc1.bias = nn.Parameter(fc1_bias_reduced)
model.fc2.bias = nn.Parameter(fc2_bias_reduced)

print(model.fc1.bias.shape)
print(model.fc2.bias.shape)

torch.Size([60])
torch.Size([42])


In [67]:
data_batch_sample = iter(testloader)
input, labels = data_batch_sample.next()

for label in labels:
    print(classes[label])


cat
ship
ship
plane


In [69]:
result = model(input)
print(result.shape)

torch.Size([4, 10])


In [64]:

output = model(in_img)
theloss = criterion(output, labels)
correct = 0.0

acc, lb_pred = torch.max(output, dim=1)

correct += (lb_pred==labels).sum().item()

accuratno = correct * 100/labels.size(0)
print(accuratno)

for a in lb_pred:
    print(classes[a])


25.0
cat
ship
plane
plane


In [115]:
print(torch.cuda.is_available())

False


In [113]:
from torch.profiler import profile, record_function, ProfilerActivity
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_small.parameters(), lr=0.001, momentum=0.9)

print(optimizer)

running_loss = 0.0
total = 0.0
correct = 0.0
loss_nodr =[]
acc_nodr = []
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

for epoch in range(5):
    
    print('epoch:', epoch)
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad(True)
        with torch.set_grad_enabled(True):
            with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
             outputs = model_small(inputs).to(device)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        running_loss += loss.item()
        if i % 2000 == 1999:
            loss_temp = running_loss / 2000
            acc_temp = 100 * correct / total
            loss_nodr.append(loss_temp)
            acc_nodr.append(acc_temp)
            print(
                f'[{epoch + 1}, {i + 1:5d}] loss: {loss_temp:.3f} acc: {acc_temp:.3f}')
            running_loss = 0.0
            correct = 0.0
            total = 0.0
print('Finished Training')


SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
epoch: 0


  warn("CUDA is not available, disabling CUDA profiling")


[1,  2000] loss: 1.573 acc: 46.188
[1,  4000] loss: 1.301 acc: 54.775
[1,  6000] loss: 1.247 acc: 56.025
[1,  8000] loss: 1.190 acc: 58.350
[1, 10000] loss: 1.177 acc: 58.288
[1, 12000] loss: 1.137 acc: 60.962
epoch: 1
[2,  2000] loss: 1.364 acc: 62.140
[2,  4000] loss: 1.082 acc: 61.775
[2,  6000] loss: 1.053 acc: 63.413
[2,  8000] loss: 1.079 acc: 61.900
[2, 10000] loss: 1.060 acc: 63.150
[2, 12000] loss: 1.088 acc: 62.087
epoch: 2
[3,  2000] loss: 1.287 acc: 64.010
[3,  4000] loss: 1.016 acc: 64.975
[3,  6000] loss: 1.016 acc: 64.475
[3,  8000] loss: 0.996 acc: 65.125
[3, 10000] loss: 1.022 acc: 64.537
[3, 12000] loss: 0.981 acc: 65.475
epoch: 3
[4,  2000] loss: 1.230 acc: 65.850
[4,  4000] loss: 0.954 acc: 66.575
[4,  6000] loss: 0.956 acc: 66.737
[4,  8000] loss: 0.984 acc: 66.300
[4, 10000] loss: 0.968 acc: 66.150
[4, 12000] loss: 0.969 acc: 66.250
epoch: 4
[5,  2000] loss: 1.150 acc: 67.480
[5,  4000] loss: 0.912 acc: 68.125
[5,  6000] loss: 0.950 acc: 66.513
[5,  8000] loss: 0.

In [118]:
dataset = iter(testloader)
in_img, lbl = dataset.next()
outputl = model_small(in_img)

acc, temp_pred = torch.max(outputl, dim=1)
print(acc)

for a in temp_pred:
    print(classes[a])


tensor([1.8310, 6.5635, 2.8749, 4.9052], grad_fn=<MaxBackward0>)
cat
ship
ship
ship


In [119]:
correct = 0
correct_1 = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs_small = model_small(images)
        outputs_normal = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs_small.data, 1)
        _, predicted_1 = torch.max(outputs_normal.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        correct_1 +=(predicted_1 == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
print(f'Accuracy of the network on the 10000 test images: {100 * correct_1 // total} %')

Accuracy of the network on the 10000 test images: 64 %
Accuracy of the network on the 10000 test images: 8 %


In [120]:
# saving the reduced model pca 
new_path = './cifar_PCA_reduced.pt'
torch.save(model_small.state_dict(),new_path)