In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

In [2]:
#Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#print(device) My laptop uses cpu

#Download MNIST dataset in local system
from torchvision import datasets
from torchvision.transforms import ToTensor

train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)

test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

loaders = {
    'train' :   torch.utils.data.DataLoader(train_data,
                                            batch_size = 100,
                                            shuffle=True
                                           ),
    'test'  :   torch.utils.data.DataLoader(test_data,
                                            batch_size = 100,
                                            shuffle=True
                                            )
}

In [3]:
class Model1 (nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(784, 10)
        self.l2 = nn.Linear(10, 20)
        self.l3 = nn.Linear(20, 10)

    def forward(self, x):
        out = F.relu(self.l1(x))
        out = F.relu(self.l2(out))
        out = self.l3(out)
        return out

In [12]:
def calculate_loss(model, loader, loss_fn): 
    correct = 0
    total = 0
    cost_total = 0
    counter = 0
    with torch.no_grad():
        for batch in loader:
            data, target = batch
            output = model(data.view(-1, 784)) # reshape the image (flatten)
            cost = loss_fn(output, target)
            cost_total += cost
            counter += 1
            
            for i, outputTensor in enumerate(output):
                if torch.argmax(outputTensor) == target[i]:
                    correct += 1
                total += 1
    return cost_total / counter, round(correct/total, 3)

# return the average loss and the accuracy            

In [13]:

from torch.autograd import Variable

def train(num_epochs, model, loaders):
    LA = []
    model.train()
    loss_func = nn.CrossEntropyLoss()  
    total_step = len(loaders['train'])
    
    df = pd.DataFrame()

    for epoch in range(num_epochs):
        epoch +=1
        for i, (images, labels) in enumerate(loaders['train']):
            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)
            b_y = Variable(labels)
            # or use data, target = Variable(images), Variable(labels)
            output = model(b_x.view(-1, 784))
            loss = loss_func(output, b_y)

            # clear gradients for this training step
            optimizer.zero_grad()

            # backpropagation, compute gradients
            loss.backward()

            #apply gradients
            optimizer.step()
            
            temp_df = pd.DataFrame()
            for name, parameter in model.named_parameters():
                if 'weight' in name:
                    weights = torch.nn.utils.parameters_to_vector(parameter).detach().numpy() 
                    temp_df = pd.concat([temp_df, pd.DataFrame(weights).T], axis = 1)
                
            df = pd.concat([df, temp_df], axis = 0)
            train_loss, train_acc = calculate_loss(M, train_loader, loss_fn)
            test_loss, test_acc = calculate_loss(M, test_loader, loss_fn)

            train_loss_arr.append(train_loss)
            test_loss_arr.append(test_loss)
            train_acc_arr.append(train_acc)
            test_acc_arr.append(test_acc)

    return df

In [14]:
# train_batch_size = 1000
# test_batch_size = 1000
train_loader, test_loader = loaders['train'], loaders['test']
train_loss_arr = []
test_loss_arr = []
train_acc_arr = []
test_acc_arr = []

In [16]:
import pandas as pd
max_epochs = 5
all_df = pd.DataFrame()
columns=["x","y","Times"]

loss_fn = nn.CrossEntropyLoss()

for count in range(8):
    print("Time: "+str(count))
    M = Model1()
    optimizer = torch.optim.Adam(M.parameters(),lr = 0.0004, weight_decay=1e-4)
    model_name1 = "Times: "+str(count)    
    temp_df = train(max_epochs, M, loaders)

    all_df = all_df.append(temp_df)

Time: 0


KeyboardInterrupt: 

In [None]:
df = all_df
df = np.array(df)
pca = PCA(n_components=2)
new_data = pca.fit_transform(df)

In [None]:
df = pd.DataFrame(new_data, columns=['x','y'])
df['Accuracy'] = train_acc_arr
df['Loss'] = train_loss_arr
final_df = df.iloc[::3, :]

In [None]:
for i in range(120):
    m = list(final_df['Accuracy'])[i]
    plt.scatter(final_df['x'][i*3], final_df['y'][i*3], marker = f'${m}$')
    plt.title("PCA for model")