In [5]:
import numpy as np # to handle matrix and data operation
import pandas as pd # to read csv and handle dataframe

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable

from sklearn.model_selection import train_test_split

In [9]:
df = pd.read_csv(r'D:\dl\mnist\train.csv')
print(df.shape)

(42000, 785)


In [12]:
y = df['label'].values
X = df.drop(['label'],axis=1).values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

In [13]:
print(y_test.shape)

(6300,)


In [14]:
BATCH_SIZE = 32

torch_X_train = torch.from_numpy(X_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long

# create feature and targets tensor for test set.
torch_X_test = torch.from_numpy(X_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor) # data type is long

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

In [15]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(784,250)
        self.linear2 = nn.Linear(250,100)
        self.linear3 = nn.Linear(100,10)
    
    def forward(self,X):
        X = F.relu(self.linear1(X))
        X = F.relu(self.linear2(X))
        X = self.linear3(X)
        return F.log_softmax(X, dim=1)
 
mlp = MLP()
print(mlp)

MLP(
  (linear1): Linear(in_features=784, out_features=250, bias=True)
  (linear2): Linear(in_features=250, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=10, bias=True)
)


We have 784\*(250+1) + 250\*(100+1) + 100\*(10+1) = 222 360 parameters to train

In [21]:
def fit(model, train_loader):
    optimizer = torch.optim.Adam(model.parameters())#,lr=0.001, betas=(0.9,0.999))
    error = nn.CrossEntropyLoss()
    EPOCHS = 5
    model.train()
    for epoch in range(EPOCHS):
        correct = 0
        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
            var_X_batch = Variable(X_batch).float()
            var_y_batch = Variable(y_batch)
            optimizer.zero_grad()
            output = model(var_X_batch)
            loss = error(output, var_y_batch)
            loss.backward()
            optimizer.step()

            # Total correct predictions
            predicted = torch.max(output.data, 1)[1] 
            correct += (predicted == var_y_batch).sum()
            #print(correct)
            if batch_idx % 50 == 0:
                print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, batch_idx*len(X_batch), len(train_loader.dataset), 100.*batch_idx / len(train_loader), loss.data[0], float(correct*100) / float(BATCH_SIZE*(batch_idx+1))))
                

In [23]:
fit(mlp, train_loader)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (896x28 and 784x250)

## MLP Evaluation

In [None]:
def evaluate(model):
#model = mlp
    correct = 0 
    for test_imgs, test_labels in test_loader:
        #print(test_imgs.shape)
        test_imgs = Variable(test_imgs).float()
        output = model(test_imgs)
        predicted = torch.max(output,1)[1]
        correct += (predicted == test_labels).sum()
    print("Test accuracy:{:.3f}% ".format( float(correct) / (len(test_loader)*BATCH_SIZE)))
evaluate(mlp)

In [18]:
torch_X_train = torch_X_train.view(-1, 1,28,28).float()
torch_X_test = torch_X_test.view(-1,1,28,28).float()
print(torch_X_train.shape)
print(torch_X_test.shape)

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

torch.Size([35700, 1, 28, 28])
torch.Size([6300, 1, 28, 28])


In [19]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32,64, kernel_size=5)
        self.fc1 = nn.Linear(3*3*64, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        #x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv3(x),2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = x.view(-1,3*3*64 )
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
 
cnn = CNN()
print(cnn)

it = iter(train_loader)
X_batch, y_batch = next(it)
print(cnn.forward(X_batch).shape)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
torch.Size([32, 10])


In [20]:
fit(cnn,train_loader)

IndexError: invalid index of a 0-dim tensor. Use `tensor.item()` in Python or `tensor.item<T>()` in C++ to convert a 0-dim tensor to a number

In [None]:
evaluate(cnn)

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(784, 250)
        self.linear2 = nn.Linear(250, 100)
        self.linear3 = nn.Linear(100, 10)

    def forward(self, x):
        x = x.view(-1, 784)  # Flatten the input
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(64 * 3 * 3, 256)  # Adjusted input features
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = x.view(-1, 64 * 3 * 3)  # Flatten the input
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def fit(model, train_loader):
    optimizer = torch.optim.Adam(model.parameters())
    error = nn.CrossEntropyLoss()
    for epoch in range(1, 6):
        correct = 0
        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
            var_X_batch = Variable(X_batch)
            var_y_batch = Variable(y_batch)
            optimizer.zero_grad()
            output = model(var_X_batch)
            loss = error(output, var_y_batch)
            loss.backward()
            optimizer.step()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(var_y_batch.data.view_as(pred)).sum()
            if batch_idx % 50 == 0:
                print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, batch_idx * len(X_batch), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item(),
                    float(correct * 100) / float(BATCH_SIZE * (batch_idx + 1))))

# Example usage:
fit(cnn, train_loader)
# fit(mlp, train_loader)

# Evaluate the model on the test set
evaluate(cnn)



NameError: name 'evaluate' is not defined

In [33]:
def compute_loss(model, data_loader):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    error = nn.CrossEntropyLoss()
    
    with torch.no_grad():  # Disable gradient computation
        for X_batch, y_batch in data_loader:
            var_X_batch = Variable(X_batch).float()
            var_y_batch = Variable(y_batch)
            output = model(var_X_batch)
            loss = error(output, var_y_batch)
            total_loss += loss.item()
    
    average_loss = total_loss / len(data_loader)
    return average_loss

In [34]:
def generate_random_direction_vectors(model, num_vectors=10):
    direction_vectors = []
    for _ in range(num_vectors):
        direction_vector = []
        for param in model.parameters():
            direction_vector.append(torch.randn_like(param))
        direction_vectors.append(direction_vector)
    return direction_vectors

random_directions = generate_random_direction_vectors(cnn, num_vectors=5)
for i, direction in enumerate(random_directions):
    print(f"Direction vector {i+1}:")
    for tensor in direction:
        print(tensor.shape)

Direction vector 1:
torch.Size([32, 1, 5, 5])
torch.Size([32])
torch.Size([32, 32, 5, 5])
torch.Size([32])
torch.Size([64, 32, 5, 5])
torch.Size([64])
torch.Size([256, 576])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])
Direction vector 2:
torch.Size([32, 1, 5, 5])
torch.Size([32])
torch.Size([32, 32, 5, 5])
torch.Size([32])
torch.Size([64, 32, 5, 5])
torch.Size([64])
torch.Size([256, 576])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])
Direction vector 3:
torch.Size([32, 1, 5, 5])
torch.Size([32])
torch.Size([32, 32, 5, 5])
torch.Size([32])
torch.Size([64, 32, 5, 5])
torch.Size([64])
torch.Size([256, 576])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])
Direction vector 4:
torch.Size([32, 1, 5, 5])
torch.Size([32])
torch.Size([32, 32, 5, 5])
torch.Size([32])
torch.Size([64, 32, 5, 5])
torch.Size([64])
torch.Size([256, 576])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])
Direction vector 5:
torch.Size([32, 1, 5, 5])
torch.Size([32])
torch.Size([3

In [35]:
def perturb_model(model,direction1,direction2,alpha,beta):
    new_model = CNN()
    for new_param, param, d1, d2 in zip(new_model.parameters(), model.parameters(), direction1, direction2):
        new_param.data = param.data + alpha * d1 + beta * d2
    return new_model

In [38]:
from mpl_toolkits.mplot3d import Axes3D

import matplotlib.pyplot as plt

# Define the grid for alpha and beta
alpha_range = np.linspace(-1, 1, 20)
beta_range = np.linspace(-1, 1, 20)

# Initialize the loss landscape
loss_landscape = np.zeros((len(alpha_range), len(beta_range)))

# Compute the loss for each point in the grid
for i, alpha in enumerate(alpha_range):
    for j, beta in enumerate(beta_range):
        perturbed_model = perturb_model(cnn, random_directions[0], random_directions[1], alpha, beta)
        loss = compute_loss(perturbed_model, test_loader)
        loss_landscape[i, j] = loss

# Plot the 3D loss landscape
alpha_grid, beta_grid = np.meshgrid(alpha_range, beta_range)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(alpha_grid, beta_grid, loss_landscape, cmap='viridis')

ax.set_xlabel('Alpha')
ax.set_ylabel('Beta')
ax.set_zlabel('Loss')
ax.set_title('3D Loss Landscape')

plt.show()

RuntimeError: shape '[-1, 576]' is invalid for input of size 524288

In [31]:
# def evaluate(model, test_loader):
#     model.eval()  # Set the model to evaluation mode
#     test_loss = 0
#     correct = 0
#     error = nn.CrossEntropyLoss()
    
#     with torch.no_grad():  # Disable gradient computation
#         for X_batch, y_batch in test_loader:
#             var_X_batch = Variable(X_batch)
#             var_y_batch = Variable(y_batch)
#             output = model(var_X_batch)
#             test_loss += error(output, var_y_batch).item()  # Sum up batch loss
#             pred = output.data.max(1, keepdim=True)[1]  # Get the index of the max log-probability
#             correct += pred.eq(var_y_batch.data.view_as(pred)).sum().item()
    
#     test_loss /= len(test_loader.dataset)
#     accuracy = 100. * correct / len(test_loader.dataset)
    
#     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
#         test_loss, correct, len(test_loader.dataset), accuracy))

# # Example usage:
# evaluate(cnn, test_loader)
# # evaluate(mlp, test_loader)


Test set: Average loss: 0.0016, Accuracy: 6196/6300 (98.35%)

