# Importing NN modules and data

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import sys
sys.path.insert(0,"/content/drive/My Drive/Colab Notebooks/NeuralNetwork")

In [2]:
import torch
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import Dataset, DataLoader

import pandas as pd
import os
import torch.optim as optim


import numpy as np
import random
import copy
import pickle
import pandas as pd
import time

from library_model import layers as lay
from library_model import model_building as mb
from library_model import model_training as mt
from data import data_loading as dt
from data import text as txt

# Process Data

In [3]:
from torchvision import datasets, transforms
from torchvision.io import read_image
from torchvision.transforms import ToTensor, Lambda

In [4]:
train_data = datasets.FashionMNIST(root = "data" , train = True, download = True, transform = ToTensor(), target_transform= Lambda( lambda y: torch.zeros(10, dtype= torch.float).scatter_(0,torch.tensor(y),value = 1)))
test_data = datasets.FashionMNIST(root = "data" , train = False, download = True, transform = ToTensor(), target_transform= Lambda( lambda y: torch.zeros(10, dtype= torch.float).scatter_(0,torch.tensor(y),value = 1)))

In [5]:
d = dt.Data()
d.train_dataloader = DataLoader( dataset= train_data, batch_size = 64, shuffle = True)
d.test_dataloader = DataLoader( dataset= test_data, batch_size = 64, shuffle = True)

In [6]:
x1, y1 = (next(iter(d.train_dataloader)))
x1.size(), y1.size()

(torch.Size([64, 1, 28, 28]), torch.Size([64, 10]))

# Define Network State

In [None]:
state = lay.Network_state()
p = state.parameters
tr = state.training
p.dropout = 0.2
p.convlayers = [{"filter_dim" : int(5), "in_features": int(1), "out_features": int(10)}, {"filter_dim": int(5), "in_features": int(10), "out_features" : int(10)}]
p.linearlayers = [{"dim_in" : int(10*(4**2)), "dim_hid" : int(100), "bias_is_true" : True , "relu_is_true" : True}, {"dim_in" : int(100), "dim_hid" : int(10), "bias_is_true" : True, "relu_is_true" : False}]
tr.lr = 1.
tr.batch_size = 64
tr.optimizer = "sgd"
tr.schedule = mb.learning_rate_step(1., 1.1, 1)

# Construct Model

In [None]:
class Convolutional(nn.Module):
    def __init__(self, state):
        super().__init__()
        p = state.parameters
        self.convlayers = nn.ModuleList([lay.ConvPool(*param.values()) for param in p.convlayers])
        self.linearlayers = nn.ModuleList([lay.linear_layer(tuple(param.values())) for param in p.linearlayers])
        self.dropout = nn.Dropout(p.dropout)

    def forward(self, input):
        dims = (int(input.size(-2)), int(input.size(-1)))
        out = input
        for layer in self.convlayers:
            out, dims = layer(out, *dims)
        out = self.dropout(out).view(out.size(0), -1)
        for layer in self.linearlayers:
            out = layer(out)
        return out
        

In [None]:
model = Convolutional(state).to(state.device)

In [None]:
opt, scheduler = mb.get_optimizer(state, model)
criterion = nn.CrossEntropyLoss().to(state.device)

# Train Model

Epoch training and network evaluation functions

In [None]:
#@title
def run_epoch():
    model.train()
    train_iterator = iter(d.train_dataloader)
    total_loss =0
    t1= time.time()
    k = 0
    for input_batch, output_batch in train_iterator:
        t_prev = time.time()
        out_prob = model(input_batch.to(state.device))
        
        loss = criterion(out_prob, output_batch.to(state.device))
        total_loss += loss.item()
        
        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) #prevents blow-ups of backpropagated derivatives
        opt.step()
        
        k+=1
        lr = scheduler.get_last_lr()[0]
        if k%300 ==0:
          print(f"Batch {k} -- lr = {lr} -- time = {time.time()-t_prev} -- loss {loss}")
    scheduler.step()
    t2= time.time() 
    return t2-t1, total_loss   

def evaluate(epoch):
  model.eval()
  test_iter = iter(d.test_dataloader)
  accuracy =0
  number = 0
  for batcht, (Xt, yt) in enumerate(test_iter):
    if (batcht < 10):
      out = model(Xt.to(state.device))
      out_pred = torch.argmax(out, dim=-1)
      out_true = torch.argmax(yt.to(state.device), dim=-1)
      number +=1
      accuracy += (sum(out_pred==out_true)/len(out_pred)).item()
  return accuracy/number

In [None]:
epochs=5
for epoch in range(epochs):
    dt, epoch_loss = run_epoch()
    #dt=epoch_loss=0
    a = evaluate(epoch)
    print(f"Epoch: {epoch} -- time: {dt} -- loss = {epoch_loss} -- accuracy = {a}\n")

Batch 300 -- lr = 1.0 -- time = 0.012428760528564453 -- loss 0.6956375241279602
Batch 600 -- lr = 1.0 -- time = 0.012656688690185547 -- loss 0.6633412837982178
Batch 900 -- lr = 1.0 -- time = 0.012647390365600586 -- loss 0.59824138879776
Epoch: 0 -- time: 22.11838436126709 -- loss = 625.2046687304974 -- accuracy = 0.8203125

Batch 300 -- lr = 0.9090909090909091 -- time = 0.01715874671936035 -- loss 0.5111762285232544
Batch 600 -- lr = 0.9090909090909091 -- time = 0.012359142303466797 -- loss 0.5309940576553345
Batch 900 -- lr = 0.9090909090909091 -- time = 0.012796878814697266 -- loss 0.4539761543273926
Epoch: 1 -- time: 22.088136672973633 -- loss = 415.8757147192955 -- accuracy = 0.8609375

Batch 300 -- lr = 0.8264462809917354 -- time = 0.012729167938232422 -- loss 0.3442299962043762
Batch 600 -- lr = 0.8264462809917354 -- time = 0.012629508972167969 -- loss 0.2715686559677124
Batch 900 -- lr = 0.8264462809917354 -- time = 0.012248992919921875 -- loss 0.38790953159332275
Epoch: 2 -- t

In [None]:
state2 = lay.Network_state()
p2 = state2.parameters
tr2 = state2.training
p2.dropout = 0.2
p2.convlayers = [{"filter_dim" : int(5), "in_features": int(3), "out_features": int(10)}, {"filter_dim": int(5), "in_features": int(10), "out_features" : int(10)}]
p2.linearlayers = [{"dim_in" : int(10*(4**2)), "dim_hid" : int(100), "bias_is_true" : True , "relu_is_true" : True}, {"dim_in" : int(100), "dim_hid" : int(10), "bias_is_true" : True, "relu_is_true" : False}]
tr2.lr = 1.
tr2.batch_size = 64
tr2.optimizer = "sgd"
tr2.schedule = mb.learning_rate_step(1., 1.1, 1)