<a href="https://colab.research.google.com/github/sumansahoo16/Categorical-Feature-Encoding-Challenge-II/blob/master/task1_62.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#To access files from google drive

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset



import os
import cv2
import glob
import time
import random
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

def seed_everything(seed = 16):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False # set True to be faster
seed_everything()

class cfg:
  seed = 43
  batch_size = 32
  num_workers = 2

  EPOCHS = 100

  LR = 0.001
  weight_decay = 0.0


In [21]:
def get_data():
  """
  Arranges Image paths and Labels in pandas DataFrame
  """
  files = glob.glob('train/*/*.png')

  data = pd.DataFrame()
  data['image'] = files
  data['label'] = data['image'].apply(lambda x : int(x[12:15]) -1)

  return data

In [22]:
def rotate_image(image, angle = 5.0):
  image_center = tuple(np.array(image.shape[1::-1]) / 2)
  rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
  result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
  return result 
  
class DataSet(Dataset):
  def __init__(self, df, rotate = False):
    self.df = df 
    self.rotate = rotate

  def __len__(self):
    return self.df.shape[0]

  def __getitem__(self, idx):
    row = self.df.loc[idx]

    img = cv2.imread(row.image)

    if self.rotate:
      img = rotate_image(img, random.uniform(-5.0, 5.0))

    #Removing some White Space from images
    img = img[200:1000, 100:800]

    #img = cv2.ximgproc.thinning(img)

    #Resizing Image
    img = cv2.resize(img, (28,28))

    #Normalizing Images
    img = img / 255

    #Pytorch Accepts [bs, channels, h, b]
    img = img.transpose(2,0,1)

    #Only one channel
    img = img[0:1, :, :]

    return torch.tensor(img).float(), torch.tensor(row.label).long()

In [23]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d( 1, 64, 3, 1, 2)
        self.conv2 = nn.Conv2d(64, 32, 3)
        self.conv3 = nn.Conv2d(32, 16, 3)

        self.maxpool = nn.MaxPool2d(2)
        self.dropout = nn.Dropout2d(0.1)

        self.out = nn.Linear(16 * 2 * 2, 62)

    def forward(self, x):
      
        x = F.relu(self.conv1(x))
        #print(x.shape)
        x = self.maxpool(x)
        #print(x.shape)

        x = F.relu(self.conv2(x))
        #print(x.shape)
        x = self.maxpool(x)
        #print(x.shape)
        x = self.dropout(x)
        

        x = F.relu(self.conv3(x))
        #print(x.shape)
        x = self.maxpool(x)
        #print(x.shape)
        x = self.dropout(x)


        x = x.view(x.size(0), -1)
        #print(x.shape)
        x = self.out(x)
        #print(x.shape)

        return x


In [24]:
###################################################################################################
def train_func(model, data_loader, criterion, optimizer, scheduler = None, device = torch.device("cuda")):
    train_labels, train_preds = [], []
    train_loss = 0.0
    
    model.train()
    for step, (images, labels) in enumerate(data_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        y_preds = model(images)
        
        loss = criterion(y_preds, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        
        train_preds.append(y_preds.softmax(1).to('cpu').detach().numpy())
        train_labels.append(labels.to('cpu').numpy())
        
    return train_loss / len(data_loader), np.concatenate(train_preds) , np.concatenate(train_labels) 
###################################################################################################

###################################################################################################    
def valid_func(model, data_loader, criterion, device = torch.device("cuda")):
    valid_preds =  []
    valid_loss = 0.0
    
    model.eval()
    for step, (images, labels) in enumerate(data_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        with torch.no_grad():
            y_preds = model(images)
        
        loss = criterion(y_preds, labels)
        valid_loss += loss.item()
        
        valid_preds.append(y_preds.softmax(1).to('cpu').detach().numpy())
        
    return valid_loss / len(data_loader), np.concatenate(valid_preds) 
###################################################################################################

In [25]:
def main():
    device = torch.device("cuda")

    #Unzip Task 1 images
    #!unzip -qq  gdrive/MyDrive/trainPart1.zip

    data = get_data()

    train, valid = train_test_split(data, test_size = 0.2, random_state = cfg.seed, stratify = data['label'])

    train = train.reset_index()
    valid = valid.reset_index()

    train_dataset = DataSet(train, rotate = False)
    valid_dataset = DataSet(valid)

    train_loader = DataLoader(train_dataset, batch_size = cfg.batch_size, 
                          shuffle = True, num_workers = cfg.num_workers,
                          pin_memory = True, drop_last = True)
    
    valid_loader = DataLoader(valid_dataset, batch_size = cfg.batch_size, 
                          shuffle = False, num_workers = cfg.num_workers,
                          pin_memory = True, drop_last = False)

    model = Model()
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(),lr = cfg.LR,
                                weight_decay = cfg.weight_decay)
    
    #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 50, 60], gamma=0.1)
   
    criterion = nn.CrossEntropyLoss()


    #-----------------------------------------------------------------#
    train_losses = []
    valid_losses = []
    
    train_acc = []
    valid_acc = []
    
    for epoch in range(cfg.EPOCHS):
        start_time = time.time()
        
        train_loss, train_prediction, train_labels = train_func(model, train_loader, criterion, optimizer)
        valid_loss, valid_prediction = valid_func(model, valid_loader, criterion)
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        
        train_acc_ = accuracy_score(train_labels, np.argmax(train_prediction, axis = 1))
        valid_acc_ = accuracy_score(valid.label , np.argmax(valid_prediction, axis = 1))
        train_acc.append(train_acc_)
        valid_acc.append(valid_acc_)
        
        #scheduler.step()
        
        
        #torch.save(model.state_dict(), f'{cfg.model_name}_{cfg.training_fold}_{epoch}.pth')
        time_taken = time.time() - start_time
        print('Epoch {:2d} | loss: {:.4f}  | val_Loss: {:.4f}  | Acc : {:.3f}  | Val_Acc : {:.3f}  | {:d}s'.
          format(epoch, train_loss, valid_loss, train_acc_, valid_acc_, int(time_taken)))
        
    return train_losses, valid_losses, train_acc, valid_acc


In [26]:
fold0 = main() 

Epoch  0 | loss: 4.1326  | val_Loss: 4.1271  | Acc : 0.019  | Val_Acc : 0.026  | 13s
Epoch  1 | loss: 4.1207  | val_Loss: 4.0908  | Acc : 0.023  | Val_Acc : 0.036  | 13s
Epoch  2 | loss: 3.9778  | val_Loss: 3.7126  | Acc : 0.052  | Val_Acc : 0.101  | 14s
Epoch  3 | loss: 3.4843  | val_Loss: 3.2379  | Acc : 0.134  | Val_Acc : 0.177  | 13s
Epoch  4 | loss: 3.0897  | val_Loss: 2.9830  | Acc : 0.198  | Val_Acc : 0.220  | 13s
Epoch  5 | loss: 2.8763  | val_Loss: 2.7631  | Acc : 0.253  | Val_Acc : 0.280  | 13s
Epoch  6 | loss: 2.6680  | val_Loss: 2.6525  | Acc : 0.302  | Val_Acc : 0.306  | 13s
Epoch  7 | loss: 2.5490  | val_Loss: 2.5316  | Acc : 0.343  | Val_Acc : 0.337  | 13s
Epoch  8 | loss: 2.4007  | val_Loss: 2.4410  | Acc : 0.369  | Val_Acc : 0.341  | 13s
Epoch  9 | loss: 2.3140  | val_Loss: 2.3742  | Acc : 0.386  | Val_Acc : 0.343  | 13s
Epoch 10 | loss: 2.2002  | val_Loss: 2.3269  | Acc : 0.421  | Val_Acc : 0.385  | 13s
Epoch 11 | loss: 2.1411  | val_Loss: 2.2817  | Acc : 0.423  | Val