# Import tools

In [None]:
# built-in utilities
import copy
import os
import time
import datetime

# data tools
import numpy as np

# pytorch 
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.tensorboard import SummaryWriter
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# visualization
import matplotlib.pyplot as plt
%matplotlib inline

# Load data

In [None]:
# define data transformer
transformation = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))]
)

## download data
# train
trainDataset = datasets.MNIST(
    "data", train=True, transform=transformation, download=True
)

# test
testDataset = datasets.MNIST(
    "data", train=False, transform=transformation, download=True
)

## load data
trainDataLoader = torch.utils.data.DataLoader(trainDataset, batch_size=128, shuffle=True)
testDataLoader = torch.utils.data.DataLoader(testDataset, batch_size=128, shuffle=True)

In [None]:
# plot sample image
def plotSample(image):
    image = image.numpy()
    mean = 0.1307
    std = 0.3081
    image = (mean * image) + std
    plt.imshow(image, cmap="gray")
    
sample = iter(trainDataLoader.dataset.data)
plotSample(next(sample))

# Question 1

## Question 1, Part 1




In [None]:
# set model architecture
class fcNet(nn.Module):
    def __init__(self):
        super(fcNet, self).__init__()
        self.inputLayer = nn.Linear(784, 1024)
        self.fullyConnected1 = nn.Linear(1024, 1024)
        self.fullyConnected2 = nn.Linear(1024, 1024)
        self.fullyConnected3 = nn.Linear(1024, 1024)
        self.fullyConnected4 = nn.Linear(1024, 1024)
        self.outputLayer = nn.Linear(1024, 10)
        
    def forward(self, x):
        x = F.relu(self.inputLayer(x))
        x = F.relu(self.fullyConnected1(x))
        x = F.relu(self.fullyConnected2(x))
        x = F.relu(self.fullyConnected3(x))
        x = F.relu(self.fullyConnected4(x))
        x = F.log_softmax(self.outputLayer(x), dim=1)
        return x

In [None]:
# set input kwargs as object attributes
class ParamConfig:  
  def __init__(self, **kwargs):
    for key, value in kwargs.items():
      setattr(self, key, value)

# configure all necessary parameters
modelParams = ParamConfig(
    model = fcNet,
    optimizer = torch.optim.Adam,
    criterion = F.nll_loss,
    trainDataLoader = torch.utils.data.DataLoader(trainDataset, batch_size=128, shuffle=True),
    testDataLoader = torch.utils.data.DataLoader(testDataset, batch_size=128, shuffle=True),
    cuda = True if torch.cuda.is_available() else False,
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    seed = 0,
    lr = 0.001,
    epochs = 25,
    saveModel = True,
)

In [None]:
class PyTorchTrainer:
    """
    
    """
    def __init__(self, config):
        self.globaliter = 0

        # data loaders
        self.trainDataLoader = config.trainDataLoader
        self.testDataLoader = config.testDataLoader
        
        # random seed settings
        self.seed = config.seed        
        torch.manual_seed(self.seed)
        
        # device settings
        self.cuda = config.cuda
        self.device = config.device
        
        # model training settings
        self.model = config.model().to(self.device)
        self.lr = config.lr
        self.epochs = config.epochs
        self.optimizer = config.optimizer(self.model.parameters(), lr=self.lr)
        self.criterion = config.criterion

        # save model
        self.saveModel = config.saveModel

        # statistics
        self.trainLoss = []
        self.testLoss = []
        self.testAccuracy = []

            
    def train(self, epoch):
        # set model to train mode
        self.model.train()
        print("*" * 80)
        
        # iterate through batches
        for batchIdx, (data, target) in enumerate(self.trainDataLoader):
            self.globaliter += 1
            
            # reshape data as needed and send data to GPU if available
            data = data.reshape(-1, 28*28).to(self.device)
            target = target.to(self.device)

            # zero out gradients
            self.optimizer.zero_grad()
            
            # generate predictiona
            preds = self.model(data)

            # calculate loss given current predictions vs. ground truth
            loss = self.criterion(preds, target)
            
            # back propagate error and optimize weights
            loss.backward()
            self.optimizer.step()

            # capture batch loss
            self.trainLoss.append(loss)

            if batchIdx % 100 == 0:
                print("Train Epoch: {} | Batch: {} [Processed {}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch, batchIdx, batchIdx * len(data), len(self.trainDataLoader.dataset),
                    100. * batchIdx / len(self.trainDataLoader), loss.item()))
        print()

    def test(self, epoch):
        # set model to eval mode
        self.model.eval()
        testLoss = 0
        correct = 0

        # turn off gradients
        with torch.no_grad():
            # iterate through batches
            for batchIdx, (data, target) in enumerate(self.testDataLoader):
                # reshape data as needed and send data to GPU if available
                data = data.reshape(-1, 28*28).to(self.device)
                target = target.to(self.device)

                # generate predictiona
                preds = self.model(data)

                # calculate loss given current predictions vs. ground truth
                testLoss = self.criterion(preds, target).item()
                preds = preds.argmax(dim=1, keepdim=True)
                
                # capture count of correct answers
                correct += preds.eq(target.view_as(preds)).sum().item()

                # capture batch loss
                self.testLoss.append(testLoss)

            # overall epoch loss and accuracy
            testLoss /= len(self.testDataLoader.dataset)
            accuracy = 100. * correct / len(self.testDataLoader.dataset)

            # capture batch loss
            self.testAccuracy.append(accuracy)

            print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n\n'.format(
                testLoss, correct, len(self.testDataLoader.dataset), accuracy))

In [None]:
### fit model
# instantiate model object
trainer = PyTorchTrainer(config=modelParams)

# iterate fitting procedure over specified epoch count
for epoch in range(1, trainer.epochs + 1):
    trainer.train(epoch)
    trainer.test(epoch)

# save model
if trainer.saveModel:
    if not os.path.isdir("models/"):
        os.mkdir("models/")
    PATH = "models/_hw3_q1_baseline.pt"
    torch.save(trainer.model.state_dict(), PATH)

In [None]:
# plot test accuracy
fig, ax = plt.subplots(figsize=(20,10), facecolor="white")
ax.plot(trainer.testAccuracy)
plt.title("Test set accuracy")
plt.show()

## Question 1, Part 3

In [None]:
# load model
PATH = "models/_hw3_q1_baseline.pt"
model = fcNet().to(device)
model.load_state_dict(torch.load(PATH))
model.eval()

In [None]:
# SVD - input layer
wIn = model.inputLayer.weight
uIn, sIn, vIn = torch.svd(wIn)
print("SVD - input layer")
print("*"*40 + "\n")
print("w shape: {}".format(wIn.shape))
print()
print("u shape: {}".format(uIn.shape))
print("s shape: {}".format(sIn.shape))
print("v shape: {}".format(vIn.shape))

wInHat = torch.mm(uIn, torch.mm(sIn.diag(), vIn.t()))
print()
print("w hat shape: {}".format(wInHat.shape))


In [None]:
# SVD - Fully connected 1
wFc1 = model.fullyConnected1.weight
uFc1, sFc1, vFc1 = torch.svd(wFc1)
print("SVD - Fully connected 1")
print("*"*40 + "\n")
print("w shape: {}".format(wFc1.shape))
print()
print("u shape: {}".format(uFc1.shape))
print("s shape: {}".format(sFc1.shape))
print("v shape: {}".format(vFc1.shape))

wFc1Hat = torch.mm(uFc1, torch.mm(sFc1.diag(), vFc1.t()))
print()
print("w hat shape: {}".format(wFc1Hat.shape))


In [None]:
# SVD - Fully connected 2
wFc2 = model.fullyConnected2.weight
uFc2, sFc2, vFc2 = torch.svd(wFc2)
print("SVD - Fully connected 2")
print("*"*40 + "\n")
print("w shape: {}".format(wFc2.shape))
print()
print("u shape: {}".format(uFc2.shape))
print("s shape: {}".format(sFc2.shape))
print("v shape: {}".format(vFc2.shape))

wFc2Hat = torch.mm(uFc2, torch.mm(sFc2.diag(), vFc2.t()))
print()
print("w hat shape: {}".format(wFc2Hat.shape))


In [None]:
# SVD - Fully connected 3
wFc3 = model.fullyConnected3.weight
uFc3, sFc3, vFc3 = torch.svd(wFc3)
print("SVD - Fully connected 3")
print("*"*40 + "\n")
print("w shape: {}".format(wFc3.shape))
print()
print("u shape: {}".format(uFc3.shape))
print("s shape: {}".format(sFc3.shape))
print("v shape: {}".format(vFc3.shape))

wFc3Hat = torch.mm(uFc3, torch.mm(sFc3.diag(), vFc3.t()))
print()
print("w hat shape: {}".format(wFc3Hat.shape))


In [None]:
# SVD - Fully connected 4
wFc4 = model.fullyConnected4.weight
uFc4, sFc4, vFc4 = torch.svd(wFc4)
print("SVD - Fully connected 4")
print("*"*40 + "\n")
print("w shape: {}".format(wFc4.shape))
print()
print("u shape: {}".format(uFc4.shape))
print("s shape: {}".format(sFc4.shape))
print("v shape: {}".format(vFc4.shape))

wFc4Hat = torch.mm(uFc4, torch.mm(sFc4.diag(), vFc4.t()))
print()
print("w hat shape: {}".format(wFc4Hat.shape))


## Question 1, Part 4


In [None]:
D = 10
compressExample = torch.mm(uIn[:,:D], torch.mm(sIn[:D].diag(), vIn[:,:D].t()))
print("compressed matrix shape: {}".format(compressExample.shape))

## Question 1, Part 5

do one feed forward in a network for each value of D

In [None]:
# load model
PATH = "models/_hw3_q1_baseline.pt"
model = fcNet().to(device)
model.load_state_dict(torch.load(PATH))
model.eval()

In [None]:
# load model
PATH = "models/_hw3_q1_baseline.pt"
model = fcNet().to(device)
model.load_state_dict(torch.load(PATH))
model.eval()

scores = []
Ds = [10, 20, 50, 100, 200, 784]

model.eval()
for D in Ds:
    model.inputLayer.weight.data = torch.mm(uIn[:,:D], torch.mm(sIn[:D].diag(), vIn[:,:D].t()))
    model.fullyConnected1.weight.data = torch.mm(uFc1[:,:D], torch.mm(sFc1[:D].diag(), vFc1[:,:D].t()))
    model.fullyConnected2.weight.data = torch.mm(uFc2[:,:D], torch.mm(sFc2[:D].diag(), vFc2[:,:D].t()))
    model.fullyConnected3.weight.data = torch.mm(uFc3[:,:D], torch.mm(sFc3[:D].diag(), vFc3[:,:D].t()))
    model.fullyConnected4.weight.data = torch.mm(uFc4[:,:D], torch.mm(sFc4[:D].diag(), vFc4[:,:D].t()))

    correct = 0

    # iterate through batches
    for batchIdx, (data, target) in enumerate(testDataLoader):
        # reshape data as needed and send data to GPU if available
        data = data.reshape(-1, 28*28).to("cuda")
        target = target.to("cuda")

        # generate predictiona
        preds = model(data)

        # calculate loss given current predictions vs. ground truth
        preds = preds.argmax(dim=1, keepdim=True)
        
        # capture count of correct answers
        correct += preds.eq(target.view_as(preds)).sum().item()

    # overall epoch loss and accuracy
    accuracy = 100. * correct / len(testDataLoader.dataset)
    
    print("D value: {} | Accuracy = {}".format(D, accuracy))


## Question 1, Part 6

train the model and update the weights

In [None]:
D = 20
uIn, vIn = uIn[:,:D], torch.mm(sIn[:D].diag(), vIn[:,:D].t())
uFc1, vFc1 = uFc1[:,:D], torch.mm(sFc1[:D].diag(), vFc1[:,:D].t())
uFc2, vFc2 = uFc2[:,:D], torch.mm(sFc2[:D].diag(), vFc2[:,:D].t())
uFc3, vFc3 = uFc3[:,:D], torch.mm(sFc3[:D].diag(), vFc3[:,:D].t())
uFc4, vFc4 = uFc4[:,:D], torch.mm(sFc4[:D].diag(), vFc4[:,:D].t())

In [None]:
print(uIn.shape)
print(vIn.shape)
print()
print(uFc1.shape)
print(vFc1.shape)
print()
print(uFc2.shape)
print(vFc2.shape)
print()
print(uFc3.shape)
print(vFc3.shape)
print()
print(uFc4.shape)
print(vFc4.shape)
print()

In [None]:
# dont forget about the bias

# 10 layers?

# set model architecture
class fcNetCompressed(nn.Module):
    def __init__(self):
        super(fcNetCompressed, self).__init__()
        self.inputLayer_V = nn.Linear(784, 20)
        self.inputLayer_U = nn.Linear(20, 1024)
        self.fullyConnected1_U = nn.Linear(1024, 20)
        self.fullyConnected1_V = nn.Linear(20, 1024)
        self.fullyConnected2_U = nn.Linear(1024, 20)
        self.fullyConnected2_V = nn.Linear(20, 1024)
        self.fullyConnected3_U = nn.Linear(1024, 20)
        self.fullyConnected3_V = nn.Linear(20, 1024)
        self.fullyConnected4_U = nn.Linear(1024, 20)
        self.fullyConnected4_V = nn.Linear(20, 1024)
        self.outputLayer = nn.Linear(1024, 10)
        
    def forward(self, x):
        x = F.relu(self.inputLayer_V(x))
        x = F.relu(self.inputLayer_U(x))
        x = F.relu(self.fullyConnected1_U(x))
        x = F.relu(self.fullyConnected1_V(x))
        x = F.relu(self.fullyConnected2_U(x))
        x = F.relu(self.fullyConnected2_V(x))
        x = F.relu(self.fullyConnected3_U(x))
        x = F.relu(self.fullyConnected3_V(x))
        x = F.relu(self.fullyConnected4_U(x))
        x = F.relu(self.fullyConnected4_V(x))
        x = F.log_softmax(self.outputLayer(x), dim=1)
        return x

model = fcNetCompressed()

### initialize weights and biases
# input weights and bias units
model.inputLayer_V.weight.data = vIn
model.inputLayer_U.weight.data = uIn

model.inputLayer_V.bias.data = torch.zeros_like(model.inputLayer_V.bias.data)
model.inputLayer_U.bias.data = torch.zeros_like(model.inputLayer_U.bias.data)

# input weights and bias units
model.fullyConnected1_U.weight.data = uFc1
model.fullyConnected1_V.weight.data = vFc1

model.fullyConnected1_U.bias.data = torch.zeros_like(model.fullyConnected1_U.bias.data)
model.fullyConnected1_V.bias.data = torch.zeros_like(model.fullyConnected1_V.bias.data)

# input weights and bias units
model.fullyConnected2_U.weight.data = uFc2
model.fullyConnected2_V.weight.data = vFc2

model.fullyConnected2_U.bias.data = torch.zeros_like(model.fullyConnected2_U.bias.data)
model.fullyConnected2_V.bias.data = torch.zeros_like(model.fullyConnected2_V.bias.data)

# input weights and bias units
model.fullyConnected3_U.weight.data = uFc3
model.fullyConnected3_V.weight.data = vFc3

model.fullyConnected3_U.bias.data = torch.zeros_like(model.fullyConnected3_U.bias.data)
model.fullyConnected3_V.bias.data = torch.zeros_like(model.fullyConnected3_V.bias.data)

# input weights and bias units
model.fullyConnected4_U.weight.data = uFc4
model.fullyConnected4_V.weight.data = vFc4

model.fullyConnected4_U.bias.data = torch.zeros_like(model.fullyConnected4_U.bias.data)
model.fullyConnected4_V.bias.data = torch.zeros_like(model.fullyConnected4_V.bias.data)

In [None]:
# set input kwargs as object attributes
class ParamConfig:  
  def __init__(self, **kwargs):
    for key, value in kwargs.items():
      setattr(self, key, value)

# configure all necessary parameters
modelParams = ParamConfig(
    model = fcNetCompressed,
    optimizer = torch.optim.Adam,
    criterion = F.nll_loss,
    trainDataLoader = torch.utils.data.DataLoader(trainDataset, batch_size=128, shuffle=True),
    testDataLoader = torch.utils.data.DataLoader(testDataset, batch_size=128, shuffle=True),
    cuda = True if torch.cuda.is_available() else False,
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    seed = 0,
    lr = 0.0001,
    epochs = 100,
    saveModel = True,
)

In [None]:
### fit model
# instantiate model object
trainer = PyTorchTrainer(config=modelParams)

# iterate fitting procedure over specified epoch count
for epoch in range(1, trainer.epochs + 1):
    trainer.train(epoch)
    trainer.test(epoch)

# save model
if trainer.saveModel:
    if not os.path.isdir("models/"):
        os.mkdir("models/")
    PATH = "models/_hw3_q1_compressed.pt"
    torch.save(trainer.model.state_dict(), PATH)

In [None]:
# plot test accuracy
fig, ax = plt.subplots(figsize=(20,10), facecolor="white")
ax.plot(trainer.testAccuracy)
plt.title("Test set accuracy - SVD")
plt.show()