In [None]:
import cv2
import os

import numpy as np
import pandas as pd

import albumentations
import joblib
import pretrainedmodels
import torch
import torch.nn as nn

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.utils import shuffle
from sklearn import metrics

from torch.nn import functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from torchvision import transforms, utils, models, datasets
from torchsummary import summary

from tqdm.notebook import tqdm



from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sn

In [None]:
TRAIN_CSV_PATH = "../data/train.csv"
STRATIFIED_CSV_PATH = "../data/stratified_train.csv"

TRAIN_IMG_PATH = "../train_images/"
COMBINED_IMAGE_PATH = "../data/"

TRAIN = ['../data/train_image_data_0.parquet',
         '../data/train_image_data_1.parquet',
         '../data/train_image_data_2.parquet',
         '../data/train_image_data_3.parquet']

TEST_CSV_PATH = "../data/test.csv"

FOLDS = 5 # Represents stratification folds
EPOCHS = 50 # number of iterations for running the model
TRAIN_BATCH_SIZE = 32 # batch size to train from train set
VALIDATION_BATCH_SIZE = 32 # batch size to validate from validation set

In [None]:
def generate_stratified_df():
    if os.path.exists(STRATIFIED_CSV_PATH):
        return pd.read_csv(STRATIFIED_CSV_PATH)
    else:
        df = pd.read_csv(TRAIN_CSV_PATH)
        print(df.shape)
        #Only used for testing purposes
        #df = df.sample(frac = 0.2, random_state=0)
        
        df.loc[:, 'kfold'] = -1
        df = df.sample(frac=1).reset_index(drop=True)
        x = df.image_id.values
        y = df[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].values
        mskf = MultilabelStratifiedKFold(n_splits=FOLDS)
        
        for fold, (trn_, val_) in enumerate(mskf.split(x, y)):
            df.loc[val_, "kfold"] = fold

        df.to_csv(STRATIFIED_CSV_PATH, index=False)
        print(df.shape)
        return df

train_df = generate_stratified_df()
train_df.head()

In [None]:
class GraphemeDataset(Dataset):
    def __init__(self, label_df, augmentation=False):
        self.label_df = label_df
        if augmentation:
            self.transforms = albumentations.Compose([
                    albumentations.Resize(137, 236, always_apply=True),
                    albumentations.ShiftScaleRotate(shift_limit=0.0625,
                                               scale_limit=0.1, 
                                               rotate_limit=5,
                                               p=0.9),
                    albumentations.Normalize((0.485, 0.456, 0.406), 
                                             (0.229, 0.224, 0.225),
                                             always_apply=True)
                ])
        else:
            self.transforms = albumentations.Compose([
                    albumentations.Resize(137, 236, always_apply=True),
                    albumentations.Normalize((0.485, 0.456, 0.406), 
                                             (0.229, 0.224, 0.225),
                                             always_apply=True)
                ])
            
    
    def __len__(self):
        return len(self.label_df)
    
    def __getitem__(self,idx):
        label1 = torch.tensor(self.label_df.grapheme_root.values[idx], dtype=torch.long)
        label2 = torch.tensor(self.label_df.vowel_diacritic.values[idx], dtype=torch.long)
        label3 = torch.tensor(self.label_df.consonant_diacritic.values[idx], dtype=torch.long)
        
        image = joblib.load(TRAIN_IMG_PATH + self.label_df.image_id.values[idx] + ".pkl")
        image = image.reshape(137,236).astype(float)
        image = Image.fromarray(image).convert("RGB")
        image = self.transforms(image=np.array(image))["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        image = torch.tensor(image, dtype=torch.float) 
        return image,label1,label2,label3

In [None]:
class ResNet50(nn.Module):
    def __init__(self, pretrained):
        super(ResNet50, self).__init__()
        if pretrained is True:
            self.model = pretrainedmodels.__dict__["resnet50"](pretrained="imagenet")
        else:
            self.model = pretrainedmodels.__dict__["resnet50"](pretrained=None)
        
        # grapheme_root
        self.l0 = nn.Linear(2048, 168)
        
        # vowel_diacritic
        self.l1 = nn.Linear(2048, 11)
        
        # consonant_diacritic
        self.l2 = nn.Linear(2048, 7)

    def forward(self, x):
        bs, _, _, _ = x.shape
        x = self.model.features(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
        l0 = self.l0(x)
        l1 = self.l1(x)
        l2 = self.l2(x)
        return l0, l1, l2

In [None]:
def run_model(model, optimizer, criterion, scheduler, 
              folds=5, epochs=20, train_batch_size=32, validation_batch_size=32):
    
    train_validate_df = generate_stratified_df()
    best_acc = 0.0
    train_losses = []
    train_recalls = []
    valid_losses = []
    valid_recalls = []
    final_train_outputs = final_train_targets = final_valid_outputs = final_valid_targets = None  
    for fold in range(folds):
        print('folds {}/{} '.format(fold+1,folds))
        
        train_df = train_validate_df[~train_validate_df.kfold.isin([fold])].reset_index(drop=True)
        validate_df = train_validate_df[train_validate_df.kfold.isin([fold])].reset_index(drop=True)
        
        train_image = GraphemeDataset(train_df)
        validation_image = GraphemeDataset(validate_df)
        
        train_loader = DataLoader(train_image, 
                batch_size=train_batch_size, shuffle=True, num_workers=4)
        
        validation_loader = DataLoader(validation_image,
            batch_size=validation_batch_size, shuffle=True, num_workers=4)
        

        
        for epoch in range(epochs):
            print('epochs {}/{} '.format(epoch+1,epochs))
            validation_loss = 0.0
            validation_acc = 0.0
            
            train_recall, train_loss, train_outputs, train_targets  = train_model(model, optimizer, criterion, train_image, train_loader)
            train_losses.append(train_loss)
            train_recalls.append(train_recall)
            
            valid_recall, valid_loss, valid_outputs, valid_targets = validate_model(model, optimizer, criterion, validation_image, validation_loader)
            valid_losses.append(valid_loss)
            valid_recalls.append(valid_recall)
            
            scheduler.step(valid_loss)

            if train_recall > best_acc:
                best_acc = train_recall
                final_train_outputs, final_train_targets, final_valid_outputs, final_valid_targets = train_outputs, train_targets, valid_outputs, valid_targets
                torch.save(model.state_dict(), "Resnet50.pth")
        
            print('epochs {}/{} completed'.format(epoch+1,epochs))
            
        print('folds {}/{} completed'.format(fold+1,folds))

    return train_recalls, train_losses, valid_recalls, valid_losses, final_train_outputs, final_train_targets, final_valid_outputs, final_valid_targets  


In [None]:
def train_model(model, optimizer, criterion, train_dataset, train_loader):
    model.train(True)
    train_acc = 0.0
    train_loss = 0.0
    final_outputs = []
    final_targets = []
    for idx, (inputs,labels1,labels2,labels3) in tqdm(
        enumerate(train_loader), total=int(len(train_dataset)/train_loader.batch_size)):
        inputs = inputs.to(device, dtype=torch.float)

        labels1 = labels1.to(device, dtype=torch.long)
        labels2 = labels2.to(device, dtype=torch.long)
        labels3 = labels3.to(device, dtype=torch.long)

        optimizer.zero_grad()
        outputs1,outputs2,outputs3 = model(inputs)

        loss1 = criterion(outputs1,labels1)
        loss2 = criterion(outputs2,labels2)
        loss3 = criterion(outputs3,labels3)
        loss = (loss1 + loss2 + loss3)/3
        
        (loss).backward()
        optimizer.step()
        
        train_loss += loss
        
        final_outputs.append(torch.cat((outputs1,outputs2,outputs3), dim=1))
        final_targets.append(torch.stack((labels1,labels2,labels3), dim=1))
    
    training_loss = train_loss / int(len(train_dataset)/train_loader.batch_size)
    
    final_outputs = torch.cat(final_outputs)
    final_targets = torch.cat(final_targets)
    macro_recall_score = macro_recall(final_outputs, final_targets)
    
    print('recall score : {:.4f}'.format(macro_recall_score))
    print('train loss : {:.4f}'.format(training_loss))
    
    return macro_recall_score, train_loss, final_outputs, final_targets
    

In [None]:
def validate_model(model, optimiser, criterion, valid_dataset, valid_loader):
    with torch.no_grad():
        model.eval()
        valid_acc = 0.0
        valid_loss = 0.0
        final_outputs = []
        final_targets = []
        for idx, (inputs,labels1,labels2,labels3) in tqdm(
            enumerate(valid_loader), total=int(len(valid_dataset)/valid_loader.batch_size)):
            inputs = inputs.to(device, dtype=torch.float)

            labels1 = labels1.to(device, dtype=torch.long)
            labels2 = labels2.to(device, dtype=torch.long)
            labels3 = labels3.to(device, dtype=torch.long)

            outputs1,outputs2,outputs3 = model(inputs)

            loss1 = criterion(outputs1,labels1)
            loss2 = criterion(outputs2,labels2)
            loss3 = criterion(outputs3,labels3)

            loss = (loss1+loss2+loss3)/3
            valid_loss += loss

            final_outputs.append(torch.cat((outputs1,outputs2,outputs3), dim=1))
            final_targets.append(torch.stack((labels1,labels2,labels3), dim=1))

        
        valid_loss = valid_loss / int(len(valid_dataset)/valid_loader.batch_size)
        
        final_outputs = torch.cat(final_outputs)
        final_targets = torch.cat(final_targets)
        macro_recall_score = macro_recall(final_outputs, final_targets)
        
        print('recall score : {:.4f}'.format(macro_recall_score))
        print('validation loss : {:.4f}'.format(valid_loss))

        return macro_recall_score, valid_loss, final_outputs, final_targets

In [None]:
def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
    
    pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
    pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]

    y = y.cpu().numpy()

    recall_grapheme = metrics.recall_score(pred_labels[0], y[:, 0], average='macro')
    recall_vowel = metrics.recall_score(pred_labels[1], y[:, 1], average='macro')
    recall_consonant = metrics.recall_score(pred_labels[2], y[:, 2], average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, 'f'total {final_score}, y {y.shape}')
    
    return final_score
    

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    use_gpu = True
    print("Using GPU")
else:
    use_gpu = False
    
FloatTensor = torch.cuda.FloatTensor if use_gpu else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_gpu else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_gpu else torch.ByteTensor
Tensor = FloatTensor

In [None]:
resnet_50 = ResNet50(True).to(device)
optimizer = torch.optim.Adam(resnet_50.parameters(), lr=0.00146)
#optimizer = torch.optim.SGD(resnet_50.parameters(), lr=4e-4)
criterion = nn.CrossEntropyLoss()
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                            mode="min", 
                                                            patience=5, 
                                                            factor=0.3,verbose=True)

In [None]:
from torchsummary import summary
summary(resnet_50, (3, 64, 64))

In [None]:
train_accs, train_losses, valid_accs, valid_losses,\
    train_outputs, train_targets, valid_outputs, \
    valid_targets = run_model(resnet_50, optimizer, criterion, 
                              scheduler, folds=FOLDS, train_batch_size=TRAIN_BATCH_SIZE, validation_batch_size=VALIDATION_BATCH_SIZE)

In [None]:
def save_metrics(train_accs, train_losses, valid_accs, valid_losses, file_name):
    # save metrics to csv
    new_train_losses = [train_loss.item() for train_loss in train_losses]
    new_valid_losses = [valid_loss.item() for valid_loss in valid_losses]
    df = [train_accs, new_train_losses, valid_accs, new_valid_losses]
    new_df = [*zip(*df)]
    dataframe=pd.DataFrame(new_df, 
                             columns=['train_accs', 'train_losses', 'valid_accs', 'valid_losses'])
    dataframe.head()
    dataframe.to_csv(file_name, index=False)

In [None]:
save_metrics(train_accs, train_losses, valid_accs, valid_losses, "../data/resnet_50_no_pretrained.csv")

In [None]:
def plot_final_confusion_matrices(validation_df, predictions_df):
    n_grapheme=168
    n_vowel=11
    n_consonant=7
    
    validation_df = torch.split(validation_df, [n_grapheme, n_vowel, n_consonant], dim=1)
    validation_df = [torch.argmax(py, dim=1).cpu().numpy() for py in validation_df]
    
    predictions_df = predictions_df.cpu().numpy()
    
    fig = plt.figure(figsize=(15, 15))
    #ax1 = fig.add_subplot(311)
    ax2 = fig.add_subplot(312)
    ax3 = fig.add_subplot(313)
    
#     commenting because of 168 labels
#     cnf_matrix_grapheme_root = metrics.confusion_matrix(validation_df[0], predictions_df[:, 0])
#     sn.heatmap(cnf_matrix_grapheme_root, annot=True, linewidths=.5, ax=ax1, fmt='d', cmap="Blues")

    cnf_matrix_vowel_diacritic = metrics.confusion_matrix(validation_df[1], predictions_df[:, 1])
    sn.heatmap(cnf_matrix_vowel_diacritic, annot=True, linewidths=.5, ax=ax2, fmt='d', cmap="Blues")

    cnf_matrix_consonant_diacritic = metrics.confusion_matrix(validation_df[2], predictions_df[:, 2])
    sn.heatmap(cnf_matrix_consonant_diacritic, annot=True, linewidths=.5, ax=ax3, fmt='d', cmap="Blues")

In [None]:
plot_final_confusion_matrices(valid_outputs, valid_targets)