In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import seaborn
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision import transforms

from torch.utils.data import TensorDataset
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import random
from sklearn.decomposition import PCA

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
class Trainer:

    def __init__(self,
                 model: torch.nn.Module,
                 device: torch.device,
                 criterion: torch.nn.Module,
                 optimizer: torch.optim.Optimizer,
                 training_DataLoader: torch.utils.data.Dataset,
                 validation_DataLoader: None,
                 # lr_scheduler: torch.optim.lr_scheduler = None,
                 epochs: int = 100,
                 epoch: int = 0,
                 notebook: bool = False,
                 path2write: str = None,
                 save_best=False,
                 save_final=True,
                 save_interval=10,
                 checkpoint_start_epoch=50
                 ):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        # self.lr_scheduler = lr_scheduler
        self.training_DataLoader = training_DataLoader
        self.validation_DataLoader = validation_DataLoader
        self.device = device
        self.epochs = epochs
        self.epoch = epoch
        self.notebook = notebook
        self.path2write = path2write
        LOG_DIR = os.path.join(path2write, 'Log')  # path2write + 'Log/'
        self.writer_train = SummaryWriter(os.path.join(LOG_DIR, "train"))
        self.writer_val = SummaryWriter(os.path.join(LOG_DIR, "val"))
        self.check_point_path = os.path.join(path2write, 'check_points')
        if not os.path.exists(self.check_point_path):
            os.makedirs(self.check_point_path)
        self.save_best = save_best
        self.save_final = save_final
        self.save_interval = save_interval
        self.checkpoint_start_epoch = checkpoint_start_epoch
        self.training_loss = []
        self.validation_loss = []
        self.learning_rate = []
        self.training_accuracy = []
        self.validation_accuracy = []

    def run_trainer(self):
        self.model.to(self.device)
        #         print(next(self.model.parameters()).device)
        if self.notebook:
            print('Notebook')
            from tqdm.notebook import tqdm, trange
        else:
            from tqdm import tqdm, trange
        #         print(self.epochs)
        progressbar = trange(self.epochs, desc='Progress', disable=True)  # don't show progressbar
        loss_max = None
        for epoch in progressbar:
            print(f'Epoch - {epoch}')

            # Training Block
            train_loss, train_accuracy = self._train()
            self.writer_train.add_scalar("Train Loss", train_loss, epoch)
            self.writer_train.add_scalar("Train Accuracy", train_accuracy, epoch)


            # Val Block
            val_loss, val_accuracy = self._validate()
            self.writer_val.add_scalar("Val Loss", val_loss, epoch)
            self.writer_val.add_scalar("Val Accuracy", val_accuracy, epoch)

            # lr
            self.writer_train.add_scalar("Learning Rate", self.optimizer.param_groups[0]['lr'], epoch)

            print('Epoch - {} Train Loss - {:.6f} Val Loss - {:.6f} Train Accuracy - {:.6f} Val Accuracy - {:.6f}'.format(epoch, train_loss, val_loss, train_accuracy, val_accuracy))
            if self.save_final:
                if epoch == self.epochs-1:
                    model_name = 'epoch-{}-loss{:.6f}'.format(epoch, val_loss)
                    torch.save(self.model.state_dict(), os.path.join(self.check_point_path, model_name))
            loss_max = val_loss

        return self.training_loss, self.validation_loss, self.model, self.training_accuracy, self.validation_accuracy

    def _train(self):

        self.model.train()
        train_losses = []
        batch_iter = tqdm(enumerate(self.training_DataLoader), 'Training', total=len(self.training_DataLoader),
                          disable=False)
        batch_acc = 0
        for i, (x, y) in batch_iter:
            input, target = x.type(torch.float32).to(self.device), y.type(torch.float32).to(self.device)
            self.optimizer.zero_grad()
            target = target.type(torch.LongTensor).to(self.device)
            output = self.model(input)
            loss = self.criterion(output, target)
            train_losses.append(loss.item())
            loss.backward()
            self.optimizer.step()
            pred = output.argmax(dim=1, keepdim=True) # max of prob
            pred = pred.flatten()
            batch_acc += torch.mean(pred.eq(target.view_as(pred)).type(torch.FloatTensor))
        accuracy = batch_acc/len(self.training_DataLoader)
        self.training_loss.append(np.mean(train_losses))  # Mean batch loss
        self.learning_rate.append(self.optimizer.param_groups[0]['lr'])
        self.training_accuracy.append(accuracy)

        batch_iter.close()  # clean up the bar
        return np.mean(train_losses), accuracy

    def _validate(self):

        self.model.eval()
        valid_losses = []
        batch_iter = tqdm(enumerate(self.validation_DataLoader), 'validation', total=len(self.validation_DataLoader), disable=False)
        batch_acc = 0
        for i, (x, y) in batch_iter:
            input, target = x.type(torch.float32).to(self.device), y.to(self.device)
            with torch.no_grad():
                output = self.model(input)
                target = target.type(torch.LongTensor).to(self.device)
                loss = self.criterion(output, target)
                valid_losses.append(loss.item())
                pred = output.argmax(dim=1, keepdim=True)
                batch_acc += torch.mean(pred.eq(target.view_as(pred)).type(torch.FloatTensor)).item()

        accuracy = batch_acc/len(self.validation_DataLoader)
        self.validation_loss.append(np.mean(valid_losses))
        self.validation_accuracy.append(accuracy)
        batch_iter.close()
        return np.mean(valid_losses), accuracy

In [None]:
class DNN(nn.Module): #for mnist
  def __init__(self):
    super().__init__()
    self.Dense1 = nn.Linear(28*28, 64)
    self.Dense2 = nn.Linear(64, 32)
    self.Dense3 = nn.Linear(32, 16)
    self.Dense4 = nn.Linear(16, 10)

  def forward(self, x):
    x = x.view(x.shape[0], -1)
    x = self.Dense1(x)
    x = self.Dense2(x)
    x = self.Dense3(x)
    x = self.Dense4(x)
    out = F.log_softmax(x)

    return out

In [None]:
gpu_id = 0
loss_fn = nn.CrossEntropyLoss()
epochs =  20
notebook = True
checkpoint_start_epoch = 5 #Not using
path2write = "drive/MyDrive/DL_homework/HW1_1/"

In [None]:
bath_size = 256
lr = 1e-4

transform=transforms.Compose([
        transforms.ToTensor()
        ])
dataset1 = datasets.MNIST('../data', train=True, download=True, transform=transform)
dataset2 = datasets.MNIST('../data', train=False, transform=transform)
training_DataLoader = DataLoader(dataset1, batch_size=bath_size, shuffle=True)
validation_DataLoader= DataLoader(dataset2, batch_size=bath_size, shuffle=True)


model = DNN()
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
trainer = Trainer(model=model,
                        device=gpu_id,
                        criterion=loss_fn,
                        optimizer=optimizer,
                        training_DataLoader=training_DataLoader,
                        validation_DataLoader=validation_DataLoader,
                        # lr_scheduler=lr_scheduler,
                        epochs=epochs,
                        epoch=0,
                        notebook=True,
                        path2write= path2write,
                        checkpoint_start_epoch=checkpoint_start_epoch )
training_loss_lr1e4_B256, validation_loss_lr1e4_B256, model_lr1e4_B256, training_accuracy_lr1e4_B256, validation_accuracy_lr1e4_B256 = trainer.run_trainer()



In [None]:
bath_size = 1024
lr = 1e-4

transform=transforms.Compose([
        transforms.ToTensor()
        ])
dataset1 = datasets.MNIST('../data', train=True, download=True, transform=transform)
dataset2 = datasets.MNIST('../data', train=False, transform=transform)
training_DataLoader = DataLoader(dataset1, batch_size=bath_size, shuffle=True)
validation_DataLoader= DataLoader(dataset2, batch_size=bath_size, shuffle=True)


model = DNN()
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
trainer = Trainer(model=model,
                        device=gpu_id,
                        criterion=loss_fn,
                        optimizer=optimizer,
                        training_DataLoader=training_DataLoader,
                        validation_DataLoader=validation_DataLoader,
                        # lr_scheduler=lr_scheduler,
                        epochs=epochs,
                        epoch=0,
                        notebook=True,
                        path2write= path2write,
                        checkpoint_start_epoch=checkpoint_start_epoch )
training_loss_lr1e4_B1024, validation_loss_lr1e4_B1024, model_lr1e4_B1024, training_accuracy_lr1e4_B1024, validation_accuracy_lr1e4_B1024 = trainer.run_trainer()

In [None]:
bath_size = 1024
lr = 1e-2

transform=transforms.Compose([
        transforms.ToTensor()
        ])
dataset1 = datasets.MNIST('../data', train=True, download=True, transform=transform)
dataset2 = datasets.MNIST('../data', train=False, transform=transform)
training_DataLoader = DataLoader(dataset1, batch_size=bath_size, shuffle=True)
validation_DataLoader= DataLoader(dataset2, batch_size=bath_size, shuffle=True)


model = DNN()
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
trainer = Trainer(model=model,
                        device=gpu_id,
                        criterion=loss_fn,
                        optimizer=optimizer,
                        training_DataLoader=training_DataLoader,
                        validation_DataLoader=validation_DataLoader,
                        # lr_scheduler=lr_scheduler,
                        epochs=epochs,
                        epoch=0,
                        notebook=True,
                        path2write= path2write,
                        checkpoint_start_epoch=checkpoint_start_epoch )
training_loss_lr1e2_B1024, validation_loss_lr1e2_B1024, model_lr1e2_B1024, training_accuracy_lr1e2_B1024, validation_accuracy_lr1e2_B1024 = trainer.run_trainer()

In [None]:
fig = plt.figure(figsize=(15,5))

ax1 = fig.add_subplot(1,2,1)
ax1.plot(training_loss_lr1e4_B256, color='orange', label='loss batch size - 256, lr=1e-4')
ax1.plot(training_loss_lr1e4_B1024, color='blue', label='loss batch size - 1024, lr=1e-4')
ax1.legend()
ax1.set_title('Batchsize Vs Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Cross Entrophy Loss')
fig.savefig(os.path.join(path2write, 'FlatVSGen_loss_constantB.png'))

ax2 = fig.add_subplot(1,2,2)
ax2.plot(training_loss_lr1e2_B1024, color='orange', label='loss batch size - 1024, lr=1e-2')
ax2.plot(training_loss_lr1e4_B1024, color='blue', label='loss batch size - 1024, lr=1e-4')
ax2.legend()
ax2.set_title('Learning Rate Vs Loss')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Cross Entrophy Loss')
fig.savefig(os.path.join(path2write, 'FlatVSGen_loss_constantLr.png'))

plt.show()


In [None]:
# alpha test
def inference(model, DataLoader, criterion, device=0):
  model.eval()
  valid_losses = []
  batch_iter = tqdm(enumerate(DataLoader), 'inference_loader', total=len(DataLoader), disable=False)
  batch_acc = 0
  for i, (x, y) in batch_iter:
      input, target = x.type(torch.float32).to(device), y.to(device)
      with torch.no_grad():
          output = model(input)
          target = target.type(torch.LongTensor).to(device)
          loss = criterion(output, target)
          valid_losses.append(loss.item())
          pred = output.argmax(dim=1, keepdim=True)
          batch_acc += torch.mean(pred.eq(target.view_as(pred)).type(torch.FloatTensor)).item()
  batch_iter.close()
  accuracy = batch_acc/len(DataLoader)
  loss = np.mean(valid_losses)
  return accuracy, loss

In [None]:
def alpha_test(model, model1, model2, training_DataLoader, validation_DataLoader, loss_fn):
  alphas = np.linspace(-1, 1, 10)
  train_loss = []
  train_accuracy = []
  val_loss = []
  val_accuracy = []
  model1_ = model1.state_dict()
  model2_ = model2.state_dict()
  model.to(device=0)
  for i in range(len(alphas)):
    alpha = alphas[i]
    parm_ = {}
    for key in  model1_.keys():
      parm_[key] = (1-alpha)*model1_[key] + alpha*model2_[key]
    model.load_state_dict(parm_)
    train_accuracy_, train_loss_ = inference(model, training_DataLoader, loss_fn, device=0)
    val_accuracy_, val_loss_ = inference(model, validation_DataLoader, loss_fn, device=0)
    train_loss.append(train_loss_)
    train_accuracy.append(train_accuracy_)
    val_loss.append(val_loss_)
    val_accuracy.append(val_accuracy_)
  return alphas, train_loss, train_accuracy, val_loss, val_accuracy

In [None]:
#lr constant
model = DNN()
model1 = model_lr1e4_B256
model2 = model_lr1e4_B1024
alphasB, train_lossB, train_accuracyB, val_lossB, val_accuracyB = alpha_test(model, model1, model2, training_DataLoader, validation_DataLoader, loss_fn)


In [None]:
fig, ax1 = plt.subplots(figsize=(20, 5))

ax1 = fig.add_subplot(1,1,1)
ax1.plot(alphasB, train_lossB, 'b-', label='Train')
ax1.plot(alphasB, val_lossB, 'b--', label='Validation')
ax1.legend()
ax1.set_xlabel('Alpha')
ax1.set_ylabel('Loss', color='b')
ax1.set_title('lr constant; Batch Size 256 Vs 1024')

ax2 = ax1.twinx()
ax2.plot(alphasB, train_accuracyB, 'r-')
ax2.plot(alphasB, val_accuracyB, 'r--')
ax2.legend()
ax2.set_ylabel('Accuracy', color='r')
fig.tight_layout()
fig.savefig(os.path.join(path2write, 'alpha_test_lr_const_Batch_256Vs1024'))
fig.show()


In [None]:
model = DNN()
model1 = model_lr1e2_B1024
model2 = model_lr1e4_B1024
alphaslr, train_losslr, train_accuracylr, val_losslr, val_accuracylr = alpha_test(model, model1, model2, training_DataLoader, validation_DataLoader, loss_fn)

In [None]:
fig, ax1 = plt.subplots(figsize=(15, 5))

ax1 = fig.add_subplot(1,1,1)
ax1.plot(alphaslr, train_losslr, 'b-', label='Train')
ax1.plot(alphaslr, val_losslr, 'b--', label='Validation')
ax1.legend()
ax1.set_xlabel('Alpha')
ax1.set_ylabel('Loss', color='b')
ax1.set_title('Batch Size Constant; Learning Rate 1e-2 Vs 1e-4')

ax2 = ax1.twinx()
ax2.plot(alphaslr, train_accuracylr, 'r-')
ax2.plot(alphaslr, val_accuracylr, 'r--')
ax2.legend()
ax2.set_ylabel('Accuracy', color='r')
fig.tight_layout()
fig.savefig(os.path.join(path2write, 'alpha_test_Batch_const_lr_1e-2Vs1e-4'))
fig.show()