In [3]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import sklearn as sk
from sklearn import decomposition as dec
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.utils.prune as prune
import sklearn.manifold as nonlin
import copy
import pickle as pk


In [4]:
print(torch.cuda.is_available())

True


## Datasets


In [49]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])

batch_size = 4
#cifar10
trainset_cifar = torchvision.datasets.CIFAR10(root='../data/', train=True,
                                        download=True, transform=transform)
trainset_cifar, validset_cifar = torch.utils.data.random_split(trainset_cifar, [45000,5000])



trainloader_cifar = torch.utils.data.DataLoader(trainset_cifar, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
validloader_cifar = torch.utils.data.DataLoader(validset_cifar, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset_cifar = torchvision.datasets.CIFAR10(root='../data/', train=False,
                                       download=True, transform=transform)
testloader_cifar = torch.utils.data.DataLoader(testset_cifar, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes_cifar = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


# MNIST DATASET
mnist_train = torchvision.datasets.MNIST(root='../data/', train=True,
                                         download=True, transform=transform)
mnist_train, mnist_valid = torch.utils.data.random_split(mnist_train, [50000,10000])
mnist_test = torchvision.datasets.MNIST(root='../data/', train=False,
                                        download=True, transform=transform)

trainloader_mnist = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size,
                                                shuffle=True, num_workers=2)
validloader_mnist =  torch.utils.data.DataLoader(mnist_valid, batch_size=batch_size,
                                                shuffle=True, num_workers=2)                                          
testloader_mnist = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size,
                                               shuffle=True, num_workers=2)
classes_mnist =  ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')


Files already downloaded and verified
Files already downloaded and verified


In [67]:
## MNIST data sample

mnist_dataset_sample = iter(testloader_mnist)
sample_im, sample_lbl = next(mnist_dataset_sample)

sample_im, sample_lbl = sample_im.to(device), sample_lbl.to(device)


## Cifar data sample
cifar_dataset_sample = iter(testloader_cifar)
sample_im_c, sample_lbl_c = next(cifar_dataset_sample)

sample_im_c, sample_lbl_c = sample_im_c.to(device), sample_lbl_c.to(device)


# functinons

In [50]:
def are_weights(model,weights):
    w1,w2,w3 = weights
    model.classifier[0].weight.data = w1
    model.classifier[3].weight.data = w2
    model.classifier[6].weight.data = w3
    return model


def construct_model(model, layers):
    l1, l2, l3 = layers
    model.classifier[0] = l1
    model.classifier[3] = l2
    model.classifier[6] = l3
    return model
    
def change_dimensionality(weight, dr_method):
    x1 = weight.detach().cpu().numpy().T
    x1 = dr_method.fit_transform(x1)
    x1 = torch.tensor(x1.T, dtype=torch.float32).to(device)
    print("x_new vector")
    print(x1.shape)
    return x1

    

In [7]:
device = torch.device('cuda:0')

In [51]:
def average_accuracy(net, dataset):
    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in dataset:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(
        f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

def every_class_accuracy(model, testloader, classes):
    cor_pred = {classname: 0 for classname in classes}
    t_pred = {classname: 0 for classname in classes}
    with torch.no_grad():
        for data in testloader:
            im, labels = data
            output = model(im)
            _, predictions = torch.max(output, dim=1)
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    cor_pred[classes[label]] += 1
                t_pred[classes[label]] += 1
    
    for classname, correct_count in cor_pred.items():
        accuracy = 100 * float(correct_count) / t_pred[classname]
        print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

        

In [54]:
from torch.profiler import profile, record_function, ProfilerActivity
import torch.optim as optim


def training(model, trainset,valset, n, path):

    running_loss = 0.0
    total = 0.0
    correct = 0.0
    loss_nodr = []
    acc_nodr = []
    
    val_loss_nodr = []
    model = model.to(device)
    val_acc_nodr = []
  

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    print(optimizer)

    for epoch in range(n):

        print('epoch:', epoch)
        for i, data in enumerate(trainset):

            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad(True)
            with torch.set_grad_enabled(True):
                with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
                 
                 outputs = model(inputs).to(device)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                running_loss += loss.item()
        model.eval()
        val_loss = 0.0
        total_val=0.0
        correct_val = 0.0

        for i, data in enumerate(valset):

            inputs, labels2 = data
            inputs = inputs.to(device)
            labels2 = labels2.to(device)
            val_output  = model(inputs).to(device)
            loss = criterion(val_output, labels2)
            val_loss+=loss.item()
            total_val+=labels2.size(0)
            _, pred = torch.max(val_output.data, 1)
            correct_val += (pred == labels2).sum().item()

            
            
        train_loss = running_loss/len(trainset)
        acc_temp = 100 * correct / total

        valid_loss_temp = val_loss/len(valset)
        valid_acc_temp = (100 * correct_val)/total_val
        loss_nodr.append(train_loss)
        acc_nodr.append(acc_temp)

        val_loss_nodr.append(valid_loss_temp)
        val_acc_nodr.append(valid_acc_temp)
        print(
            f'[{epoch + 1}, {i + 1:5d}] train loss: {train_loss:.3f} train acc: {acc_temp:.3f}', 
            f'valid acc: {valid_acc_temp:.3f} valid loss  {valid_loss_temp:.3f} ')
        
        running_loss = 0.0
        correct = 0.0
        total = 0.0

    print('Finished Training')
    pickle_accloss(acc_nodr, loss_nodr,val_acc_nodr,val_loss_nodr,  path)

    
def pickle_accloss(acc, loss,valid_acc, valid_loss, path):
    accandloss = {'accuracy' : acc, 'loss' : loss, 'valid acc' : valid_acc, 'valid loss': valid_loss}
    with open(path, 'wb') as ica_acc_and_loss:
        pk.dump(accandloss, ica_acc_and_loss)
    print('Saved dictionary of loss and accuracy!')



In [52]:
from torchvision.models import vgg16, VGG16_Weights
vggmodel = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
vggmodel = vggmodel.to(device)

In [11]:
vggmodel.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [55]:
thepath = './data_vis/cifar10/ica/vgg_model_cifar_vis.pickle'
training(vggmodel, trainloader_cifar, validloader_cifar, 3, thepath)

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
epoch: 0
[1,  1250] train loss: 0.261 train acc: 91.427 valid acc: 85.600 valid loss  0.456 
epoch: 1
[2,  1250] train loss: 0.210 train acc: 93.004 valid acc: 86.200 valid loss  0.454 
epoch: 2
[3,  1250] train loss: 0.163 train acc: 94.740 valid acc: 84.520 valid loss  0.552 
Finished Training
Saved dictionary of loss and accuracy!


In [57]:
average_accuracy(vggmodel, testloader_cifar)

Accuracy of the network on the 10000 test images: 84 %


In [58]:
def return_model_weights(model):
    new_weights={}
    for i, mod in enumerate(model.classifier):
        if isinstance(mod, nn.Linear):
            new_weights[i] = mod.weight
    return new_weights

def drprojection(model,method, whiten):
    ret_weights = return_model_weights(model)
    weight_list = {}
    transform = None
    for i,(key,value) in enumerate(ret_weights.items()):
        original_weight = value
        if transform is not None:
            original_weight = torch.mm(value, transform)
        temp_list = list(ret_weights)
        if i!=len(temp_list)-1:
            w_reduced = change_dimensionality(
            original_weight, method(n_components=int(value.shape[0]/2),  whiten=whiten, random_state=0, tol= 10)) 
        else:
            w_reduced = original_weight

        transform = torch.mm(original_weight, w_reduced.T)
        weight_list[key] = w_reduced
        
    return weight_list


In [59]:
# def reduce_layers(model, layer_type(nn.Conv2D, nn.Linear)): functinon that would reduce specified layer of the model

def return_conv_layer_weights(model):
    vgg_weight_list = {}
    for i,feature in enumerate(model.features):
        if i == 0:
            continue
        if isinstance(feature, nn.Conv2d):
            print(feature.weight.shape)
            vgg_weight_list[i] = feature.weight
    return vgg_weight_list




def conv_reduction(matrix,Dw):
    activate = False
    matrix = matrix.reshape([matrix.shape[0], matrix.shape[1]*9])
    matrix = matrix.T
    if Dw is not None:
        matrix = matrix.reshape([-1, matrix.shape[1]*9])
        matrix = torch.mm(Dw, matrix)
        matrix = matrix.reshape([matrix.shape[0]*9, -1])
    if matrix.shape[0] < int(matrix.shape[1]/2):
        matrix = matrix.reshape([matrix.shape[0]*2, -1])
        activate = True
    print(matrix.shape)
    new_dec = dec.FastICA(n_components=int(matrix.shape[1]/2), whiten='unit-variance', random_state=0, max_iter=350)
    reduced_matrix = new_dec.fit_transform(matrix.detach().cpu().numpy())
    print(reduced_matrix.shape)
    if activate == True:
        reduced_matrix = reduced_matrix.reshape([-1, reduced_matrix.shape[1]*2])
    reduced_matrix = reduced_matrix.T
    t_reduced_matrix = torch.tensor(reduced_matrix, dtype=torch.float32).to(device)
    print(t_reduced_matrix.shape)
    Dm = torch.mm(t_reduced_matrix, matrix)
    print("delta matrix: ", Dm.shape)
    return (t_reduced_matrix, Dm)

def reduced_conv_layer_weights(conv_weight_matrices):
    reduced_vgg_params = {}
    t_w = None
    for key,value in conv_weight_matrices.items():
        (reduced_w,Dw) = conv_reduction(value, t_w)
        t_w = Dw
        reduced_w = reduced_w.reshape((reduced_w.shape[0],reduced_w.shape[1],1,1))
        reduced_w = reduced_w.reshape([reduced_w.shape[0],int(reduced_w.shape[1]/9),3,-1])
        reduced_vgg_params[key]=reduced_w
    return reduced_vgg_params

    

In [60]:
# def model_reconstruction(vggmodel, weights, layer):
#     new_model = copy.deepcopy(vggmodel)
#     match layer:
#         case 'Linear':   
#             for i,w in enumerate(weights):
#                 model_layer = nn.Linear(w.shape[1], w.shape[0], False)
#                 new_model.classifier[i] = model_layer
#                 new_model.classifier[i].weight.data = w
#                 return new_model

#         case 'Conv':
#             for i,w in enumerate(weights):
#                 model_layer = nn.Conv2d(w.shape[1], w.shape[0], kernel_size=(w.shape[2], w.shape[3]),
#                  stride=(1, 1), padding=(1, 1))
#                 new_model.features[i] = model_layer
#                 new_model.features[i].weight.data = w
#                 return new_model

#         case '':
#             return 'Please choose layer type(Conv,Linear)'
def model_reconstruction(vggmodel, weights, layer):
    new_model = copy.deepcopy(vggmodel)
    match layer:
        case 'Linear':   
            for key,value in weights.items():
                model_layer = nn.Linear(value.shape[1], value.shape[0], False)
                new_model.classifier[key] = model_layer
                new_model.classifier[key].weight.data = value
            return new_model

        case 'Conv':
            for key,value in weights.items():
                model_layer = nn.Conv2d(value.shape[1], value.shape[0], 
                kernel_size=(value.shape[2], value.shape[3]),
                 stride=(1, 1), padding=(1, 1))
                new_model.features[key] = model_layer
                new_model.features[key].weight.data = value
            return new_model

        case '':
            return 'Please choose layer type(Conv,Linear)'


In [61]:
def dr_weight_layer_reduction(model, layertype):
    """
    the functionn that performs dimensional reeduction of weight matrices.
    Depending on the layer it would perform dimensional reducton accordingly

    :param1: CNN model input p1
    :param2: input the type of layer(Conv, Linear) p2
    :return: returns a list of dimensioaly reduced matrices
    """ 
    match layertype: 
        case 'Conv':
            weight_list_vgg = return_conv_layer_weights(vggmodel)
            reduced_vgg_conv_weight_list = reduced_conv_layer_weights(weight_list_vgg)
            return reduced_vgg_conv_weight_list
        case 'Linear':
            weight_list = drprojection(vggmodel,dec.FastICA, 'â€˜unit-variance')
            return weight_list
            
        case _:
            return 'Provide the layer type like Conv or Linear'


            
            


In [19]:




# ica_vggmodel = copy.deepcopy(vggmodel)

# vgg_layers = (nn.Linear(25088, 2048, False), nn.Linear(
#     2048, 2048, False), nn.Linear(2048, 1000, False))

# ica_vggmodel = construct_model(
#     ica_vggmodel, vgg_layers)
    
# ica_vggmodel = are_weights(ica_vggmodel, tuple(weight_list))
# ica_vggmodel.eval()


In [62]:
linear_weights = dr_weight_layer_reduction(vggmodel, 'Linear')
conv_weights = dr_weight_layer_reduction(vggmodel, 'Conv')

vgg_reduced_model = model_reconstruction(vggmodel,linear_weights,'Linear')
vgg_reduced_model = model_reconstruction(vgg_reduced_model,conv_weights, 'Conv')

x_new vector
torch.Size([2048, 25088])
x_new vector
torch.Size([2048, 2048])
torch.Size([64, 64, 3, 3])
torch.Size([128, 64, 3, 3])
torch.Size([128, 128, 3, 3])
torch.Size([256, 128, 3, 3])
torch.Size([256, 256, 3, 3])
torch.Size([256, 256, 3, 3])
torch.Size([512, 256, 3, 3])
torch.Size([512, 512, 3, 3])
torch.Size([512, 512, 3, 3])
torch.Size([512, 512, 3, 3])
torch.Size([512, 512, 3, 3])
torch.Size([512, 512, 3, 3])
torch.Size([576, 64])
(576, 32)
torch.Size([32, 576])
delta matrix:  torch.Size([32, 64])
torch.Size([288, 128])
(288, 64)
torch.Size([64, 288])
delta matrix:  torch.Size([64, 128])
torch.Size([576, 128])
(576, 64)
torch.Size([64, 576])
delta matrix:  torch.Size([64, 128])
torch.Size([576, 256])
(576, 128)
torch.Size([128, 576])
delta matrix:  torch.Size([128, 256])
torch.Size([1152, 256])
(1152, 128)
torch.Size([128, 1152])
delta matrix:  torch.Size([128, 256])
torch.Size([1152, 256])
(1152, 128)
torch.Size([128, 1152])
delta matrix:  torch.Size([128, 256])
torch.Size([1

In [71]:
for conweights in vgg_reduced_model.features:
    if isinstance(conweights, nn.Conv2d):
        print(conweights.weight.shape)
for conweights in vgg_reduced_model.classifier:
    if isinstance(conweights, nn.Linear):
        print(conweights.weight.shape)

torch.Size([64, 3, 3, 3])
torch.Size([32, 64, 3, 3])
torch.Size([64, 32, 3, 3])
torch.Size([64, 64, 3, 3])
torch.Size([128, 64, 3, 3])
torch.Size([128, 128, 3, 3])
torch.Size([128, 128, 3, 3])
torch.Size([256, 128, 3, 3])
torch.Size([256, 256, 3, 3])
torch.Size([256, 256, 3, 3])
torch.Size([256, 256, 3, 3])
torch.Size([256, 256, 3, 3])
torch.Size([256, 256, 3, 3])
torch.Size([2048, 25088])
torch.Size([2048, 2048])
torch.Size([1000, 2048])


In [70]:
vgg_reduced_model = vgg_reduced_model.to(device)
test_output = vgg_reduced_model(sample_im_c)
print(test_output)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x12544 and 25088x2048)