# **Multi-Layer Perceptron Model**

**Installing dependencies in our notebook**

---



In [None]:
!pip3 install torch numpy matplotlib 



**Importing the required dependencies.**

---



In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

**Device configuration**

---



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

**Function to Read and format the data as list of tuples containing feature tensor and label.**

---



In [None]:
def read_data(file):
  data = []
  f = open(file)
  while(True):
    s = f.readline()
    if len(s) == 0:
      break
    curr = list(s.split(','))
    data.append((torch.tensor(list(map(float, curr[:-1]))), int(curr[-1])))
  return data

**Loading the dataset.**

---



In [None]:
# Edit with the path of train and test dataset
train_dataset = read_data('optdigits.tra')
test_dataset = read_data('optdigits.tes')

**Function to train the model**

---



In [None]:
def train(model, train_dataset, criterion=nn.CrossEntropyLoss(), learn_rate=0.001, max_iter=20, batch_size=5, loss_tol=1e-3, get_stats=True):
  from torch import optim

  # making batches of training data
  batch_size = min(batch_size, len(train_dataset))
  train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                            batch_size=batch_size, 
                                            shuffle=True)

  # using SGD optimizer
  optimizer = optim.SGD(model.parameters(), lr=learn_rate)

  n_total_steps = len(train_loader)
  prev_loss = 1e5

  for epoch in range(max_iter):
      running_loss = 0
      for i, (images, labels) in enumerate(train_loader):
          images = images.to(device)
          labels = labels.to(device)

          # Forward pass
          outputs = model(images)
          loss = criterion(outputs, labels)
          
          # Backward and optimize
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          curr_loss = loss.item()
          if get_stats and ((i+1) % 100 == 0):
              print (f'Epoch [{epoch+1}/{max_iter}], Step [{i+1}/{n_total_steps}], Loss: {curr_loss:.4f}')
      
      # checking convergence
      if abs(prev_loss - curr_loss) < loss_tol :
        if get_stats:
          print(f'Completed in {epoch}')
        return  
      prev_loss = curr_loss
  
  # reached maximum iterations
  if get_stats:    
      print(f'Not converged yet, completed max {epoch} iterations')

**Function to Test the model**

---


In [None]:
def test(model, test_dataset, batch_size=5, model_desc="model"):

  # making batches of test data
  batch_size = min(batch_size, len(test_dataset))
  test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                            batch_size=batch_size, 
                                            shuffle=False)
  
  # We don't need to compute gradients 
  # (for memory efficiency) in the test phase
  with torch.no_grad():
      n_correct = 0
      n_samples = 0
      for images, labels in test_loader:
          images = images.to(device)
          labels = labels.to(device)
          outputs = model(images)

          # labels are the classes with maximum
          # probability
          _, predicted = torch.max(outputs.data, 1)
          n_samples += labels.size(0)
          
          # updating number of correct classification
          # with matches of predicted and the actual labels
          n_correct += (predicted == labels).sum().item()
      
      acc = 100.0 * n_correct / n_samples
      print(f'Accuracy of the {model_desc} on {n_samples} test samples: {acc} %')
      return acc

**Function to run train and test defined models**

---



In [None]:
def run_models(inp_dim, out_dim, train_dataset, test_dataset, batch_size=5, max_iter=10, learn_rate=0.001, loss_tol=1e-3, criterion=nn.CrossEntropyLoss(), get_stats=False):
  
  ps_accs = []
  
  model_1 = nn.Linear(inp_dim, out_dim).to(device)
  train(model_1, train_dataset, batch_size=batch_size, criterion=criterion, learn_rate=learn_rate, max_iter=max_iter, loss_tol=loss_tol, get_stats=get_stats)
  ps_accs.append(test(model_1, test_dataset, batch_size=batch_size, model_desc="model 1"))

  model_2 = nn.Sequential(nn.Linear(inp_dim, 2), 
                          nn.ReLU(),
                          nn.Linear(2, out_dim)).to(device)
  train(model_2, train_dataset, batch_size=batch_size, criterion=criterion, learn_rate=learn_rate, max_iter=max_iter, loss_tol=loss_tol, get_stats=get_stats)
  ps_accs.append(test(model_2, test_dataset, batch_size=batch_size, model_desc="model 2"))

  model_3 = nn.Sequential(nn.Linear(inp_dim, 6), 
                          nn.ReLU(),
                          nn.Linear(6, out_dim)).to(device)
  train(model_3, train_dataset, batch_size=batch_size, criterion=criterion, learn_rate=learn_rate, max_iter=max_iter, loss_tol=loss_tol, get_stats=get_stats)
  ps_accs.append(test(model_3, test_dataset, batch_size=batch_size, model_desc="model 3"))

  model_4 = nn.Sequential(nn.Linear(inp_dim, 2), 
                          nn.ReLU(),
                          nn.Linear(2, 3),
                          nn.ReLU(),
                          nn.Linear(3, out_dim)).to(device)
  train(model_4, train_dataset, batch_size=batch_size, criterion=criterion, learn_rate=learn_rate, max_iter=max_iter, loss_tol=loss_tol, get_stats=get_stats)
  ps_accs.append(test(model_4, test_dataset, batch_size=batch_size, model_desc="model 4"))

  model_5 = nn.Sequential(nn.Linear(inp_dim, 3), 
                          nn.ReLU(),
                          nn.Linear(3, 2),
                          nn.ReLU(),
                          nn.Linear(2, out_dim)).to(device)
  train(model_5, train_dataset, batch_size=batch_size, criterion=criterion, learn_rate=learn_rate, max_iter=max_iter, loss_tol=loss_tol, get_stats=get_stats)
  ps_accs.append(test(model_5, test_dataset, batch_size=batch_size, model_desc="model 5"))

  return ps_accs

**Task 2**

---



In [None]:
# Hyperparameters
inp_dim = 64
out_dim = 10
max_iter = 10
batch_size = 5
loss_tol=1e-3
cont_lrs = [0.1, 0.01, 0.005, 0.001, 0.0001, 1e-5]

# contains accuracies for all learning rate and models
accs = []

for learn_rate in cont_lrs:
  print(f"\nFor learning rate = {learn_rate}")
  accs.append(run_models(inp_dim, out_dim, train_dataset, test_dataset, batch_size=batch_size, max_iter=max_iter, learn_rate=learn_rate, loss_tol=loss_tol, get_stats=False))


For learning rate = 0.1
Accuracy of the model 1 on 1797 test samples: 92.87701725097385 %
Accuracy of the model 2 on 1797 test samples: 10.01669449081803 %
Accuracy of the model 3 on 1797 test samples: 10.127991096271563 %
Accuracy of the model 4 on 1797 test samples: 9.905397885364497 %
Accuracy of the model 5 on 1797 test samples: 9.84974958263773 %

For learning rate = 0.01
Accuracy of the model 1 on 1797 test samples: 90.98497495826378 %
Accuracy of the model 2 on 1797 test samples: 18.586533110740124 %
Accuracy of the model 3 on 1797 test samples: 84.91930996104618 %
Accuracy of the model 4 on 1797 test samples: 29.66054535336672 %
Accuracy of the model 5 on 1797 test samples: 45.742904841402336 %

For learning rate = 0.005
Accuracy of the model 1 on 1797 test samples: 94.82470784641069 %
Accuracy of the model 2 on 1797 test samples: 24.095715080690038 %
Accuracy of the model 3 on 1797 test samples: 73.90094602114635 %
Accuracy of the model 4 on 1797 test samples: 52.754590984974

**Task 4**

---



In [None]:
# plotting accuracy vs model for all learning rates
plt.figure(figsize=(15, 9))
for ind_lr in range(len(cont_lrs)):
  plt.plot(range(1,len(accs[ind_lr])+1), accs[ind_lr], label="learning_rate="+str(cont_lrs[ind_lr]))
plt.legend(loc = "upper right")
plt.xlabel('model number')
plt.ylabel('accuracy(%)')
plt.xticks(range(1,len(accs[0])+1))
# plt.grid(b=True)
plt.title("model_vs_accuracy_for_learning_rate")
plt.savefig('model_vs_accuracy_for_learning_rate.png')
plt.show()
plt.close()

# plotting accuracy vs learning rates for all models
plt.figure(figsize=(15, 9))
for ind_mod in range(len(accs[0])):
  plt.plot(np.log10(np.array(cont_lrs)), [accs[ind_lr][ind_mod] for ind_lr in range(len(cont_lrs))], label="model="+str(ind_mod+1))
plt.legend(loc = "upper left")
plt.xlabel('log10(learning rate)')
plt.ylabel('accuracy(%)')
plt.xticks(np.log10(np.array(cont_lrs)))
# plt.grid(b=True)
plt.title("learning_rate_vs_accuracy_for_model")
plt.savefig('learning_rate_vs_accuracy_for_model.png')
plt.show()
plt.close()

# **PCA** **Analysis**

**Task 5**

---



In [None]:
# Reducing train and test feature space
train_feat_tensor = torch.as_tensor([tup[0].tolist() for tup in train_dataset])
U,S,V = torch.pca_lowrank(train_feat_tensor, q=None, center=True, niter=3)
red_train_feat = torch.matmul(train_feat_tensor, V[:, :2]).tolist()
red_train_data = [(torch.tensor(red_train_feat[i]), train_dataset[i][1]) for i in range(len(train_dataset))]

# using V to reduce the test feature tensors
test_feat_tensor = torch.as_tensor([tup[0].tolist() for tup in test_dataset])
red_test_feat = torch.matmul(test_feat_tensor, V[:, :2]).tolist()
red_test_data = [(torch.tensor(red_test_feat[i]), test_dataset[i][1]) for i in range(len(test_dataset))]

In [None]:
# plotting reduced train features
plt.figure(figsize=(24, 24))
label_color = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
color_tak = [0]*10

for p_ind in range(len(train_dataset)):
  curr_label = train_dataset[p_ind][1]
  if color_tak[curr_label]:
    plt.plot(red_train_feat[p_ind][0], red_train_feat[p_ind][1], marker="o", markersize=5, markeredgecolor='none', markerfacecolor=label_color[curr_label])
  else:
    plt.plot(red_train_feat[p_ind][0], red_train_feat[p_ind][1], marker="o", markersize=5, markeredgecolor='none',markerfacecolor=label_color[curr_label], label="digit "+str(curr_label))
  color_tak[curr_label] = 1

plt.legend(loc="upper left", fontsize=20, ncol = 2))
plt.savefig('pca_on_training_data_visual.png')
plt.show()
plt.close()

**Task 6**

---



In [None]:
cont_lrs = [0.001]
inp_dim = 2
out_dim = 10
max_iter = 10
batch_size = 5
loss_tol=1e-3
accs = []
for learn_rate in cont_lrs:
  print(f"\nFor learning rate = {learn_rate}")
  accs.append(run_models(inp_dim, out_dim, red_train_data, red_test_data, batch_size=batch_size, max_iter=max_iter, learn_rate=learn_rate, loss_tol=loss_tol, get_stats=False))