In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Train section

In [None]:
with open('/kaggle/input/ext-libs/requirements.txt', 'r') as f:
    print(f.read())

In [None]:
!pip install ../input/validname-effnet/efficientnet_pytorch-0.6.3-py3-none-any.whl

In [None]:
import numpy as np 
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import sklearn
import csv
from tqdm.notebook import tqdm
from efficientnet_pytorch import EfficientNet
from efficientnet_pytorch.utils import (
    round_filters,
    get_same_padding_conv2d
)
from torch import optim
from albumentations import (
    PadIfNeeded,
    HorizontalFlip,
    VerticalFlip,    
    CenterCrop,    
    Crop,
    Compose,
    Transpose,
    RandomRotate90,
    ElasticTransform,
    GridDistortion, 
    OpticalDistortion,
    RandomSizedCrop,
    OneOf,
    CLAHE,
    RandomBrightnessContrast,    
    
    RandomGamma,
    ShiftScaleRotate ,
    GaussNoise,
    Blur,
    MotionBlur,   
    GaussianBlur,
)
import gc

In [None]:
CHECKPOINT_FNAME = '/kaggle/input/checkpoints/checkpoint_val.chk'
BACKBONE = 'efficientnet-b3'

HEIGHT = 137
WIDTH = 236
SIZE = 128

N_EPOCHS = 30 
BATCH_SIZE = 64

LITE = False
TRAIN = True

RESUME = False
if os.path.isfile(CHECKPOINT_FNAME):
    RESUME = True

TRAIN = ['/kaggle/input/feathers/train_data_00_l.feather',
    '/kaggle/input/feathers/train_data_11_l.feather',
    '/kaggle/input/feathers/train_data_22_l.feather',
    '/kaggle/input/feathers/train_data_33_l.feather']

TRAIN_LABELS = '/kaggle/input/bengaliai-cv19/train.csv'

TEST = ['/kaggle/input/bengaliai-cv19/test_image_data_0.parquet',
         '/kaggle/input/bengaliai-cv19/test_image_data_1.parquet',
         '/kaggle/input/bengaliai-cv19/test_image_data_2.parquet',
        '/kaggle/input/bengaliai-cv19/test_image_data_3.parquet']
TEST_LABELS = '/kaggle/input/bengaliai-cv19/test.csv'

In [None]:
# https://www.kaggle.com/iafoss/image-preprocessing-128x128

def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    return cv2.resize(img,(size,size))

In [None]:
class BengaliDatasetTrain(Dataset):

    def __init__(self, df, labels, transform=True):
        self.grapheme_roots = labels['grapheme_root'].values.astype(np.uint8)
        self.vowel_diacritics = labels['vowel_diacritic'].values.astype(np.uint8)
        self.consonant_diacritics = labels['consonant_diacritic'].values.astype(np.uint8)
        self.data = df.iloc[:, 1:].values
        self.transform = transform

        self.aug = Compose([ 
            ShiftScaleRotate(p=1,border_mode=cv2.BORDER_CONSTANT,value =1),
            OneOf([
                ElasticTransform(p=0.1, alpha=1, sigma=50, alpha_affine=50,border_mode=cv2.BORDER_CONSTANT,value =1),
                GridDistortion(distort_limit =0.05 ,border_mode=cv2.BORDER_CONSTANT,value =1, p=0.1),
                OpticalDistortion(p=0.1, distort_limit= 0.05, shift_limit=0.2,border_mode=cv2.BORDER_CONSTANT,value =1)                  
                ], p=0.3),
            OneOf([
                GaussNoise(var_limit=1.0),
                Blur(),
                GaussianBlur(blur_limit=3)
                ], p=0.4),    
            RandomGamma(p=0.8)])

    def __str__(self):
        string  = ''
        string += '\tlen = %d\n'%len(self)
        string += '\n'
        return string

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        grapheme_root = self.grapheme_roots[index]
        vowel_diacritic = self.vowel_diacritics[index]
        consonant_diacritic = self.consonant_diacritics[index]
        
#         img0 = 255 - self.data[index, :].reshape(HEIGHT, WIDTH).astype(np.uint8)
#         img = (img0*(255.0/img0.max())).astype(np.uint8)
#         img = crop_resize(img)
        img = self.data[index, :].reshape(SIZE, SIZE)
        if self.transform:
            img = self.aug(image=img)['image']

        return img, grapheme_root, vowel_diacritic, consonant_diacritic

In [None]:
if not LITE:
    try:
        del df
        gc.collect()
    except NameError:
        pass
    labels_all = pd.read_csv(TRAIN_LABELS)
    df = pd.read_feather(TRAIN[0])
    for i in range(1, 4):
        read = pd.read_feather(TRAIN[i])
        gc.collect()
        df = pd.concat([df, read], ignore_index=True)
        gc.collect()
    labels = labels_all[:len(df)]
    gc.collect()
else:
    try:
        del df
        gc.collect()
    except NameError:
        pass

    labels_all = pd.read_csv(TRAIN_LABELS)
    labels = labels_all[labels_all.grapheme_root.isin([59,60,61,62,63,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95])]

    df_all = pd.read_feather(TRAIN[0])
    df_all = df_all.loc[df_all.image_id.isin(labels.image_id.values)]
    gc.collect()
    
    for i in range(1, 4):
        read = pd.read_feather(TRAIN[i])
        read = read.loc[read.image_id.isin(labels.image_id.values)]
        gc.collect()
        df_all = pd.concat([df_all, read], ignore_index=True)
        gc.collect()

    df = df_all.loc[df_all.image_id.isin(labels.image_id.values)]
    del df_all, labels_all
    gc.collect()

In [None]:
print("labels shape = ", labels.shape)
print("df shape = ", df.shape)

In [None]:
# https://github.com/hysts/pytorch_image_classification/blob/master/optim.py
class LARSOptimizer(torch.optim.Optimizer):
    def __init__(self,
                 params,
                 lr=0.02,
                 momentum=0,
                 weight_decay=0,
                 eps=1e-9,
                 thresh=1e-2):

        if lr < 0.0:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if momentum < 0.0:
            raise ValueError("Invalid momentum value: {}".format(momentum))
        if weight_decay < 0.0:
            raise ValueError(
                "Invalid weight_decay value: {}".format(weight_decay))

        defaults = dict(
            lr=lr,
            momentum=momentum,
            weight_decay=weight_decay,
            eps=eps,
            thresh=thresh)
        super(LARSOptimizer, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            lr = group['lr']
            eps = group['eps']
            thresh = group['thresh']

            for p in group['params']:
                if p.grad is None:
                    continue

                d_p = p.grad.data

                weight_norm = torch.norm(p.data)
                grad_norm = torch.norm(d_p)
                local_lr = weight_norm / (
                    eps + grad_norm + weight_decay * weight_norm)
                local_lr = torch.where(weight_norm < thresh,
                                       torch.ones_like(local_lr), local_lr)

                if weight_decay != 0:
                    d_p.add_(weight_decay, p.data)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state[
                            'momentum_buffer'] = torch.zeros_like(p.data)
                        buf.mul_(momentum).add_(lr * local_lr, d_p)
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(lr * local_lr, d_p)
                p.data.add_(-1.0, buf)

        return loss

In [None]:
# https://arxiv.org/pdf/1811.00202.pdf
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = p
        self.eps = eps
    
    def forward(self, x):
        return (torch.mean(torch.abs(x**self.p), dim=(2,3)) + self.eps)**(1.0/self.p)


class EffNetTuned(nn.Module):
    def __init__(self):
        super(EffNetTuned, self).__init__()
        self.eff = EfficientNet.from_name(BACKBONE)
        # 1 channel
        Conv2d = get_same_padding_conv2d(image_size = self.eff._global_params.image_size)
        out_channels = round_filters(32, self.eff._global_params)
        self.eff._conv_stem = Conv2d(1, out_channels, kernel_size=3, stride=2, bias=False)

        self.eff._avg_pooling = GeM()
        self.eff._dropout = nn.Dropout(p=0.2, inplace=False)
        self.fc = nn.Sequential(
            nn.Linear(in_features=1000, out_features=512),
            nn.BatchNorm1d(512)
        ) 

    def forward(self, x):
        x = self.eff(x)
        x = self.fc(x)
        return x

class EffMultioutputNet(nn.Module):
    def __init__(self):
        super(EffMultioutputNet, self).__init__()
        self.backbone = EffNetTuned()
        self.cosRootClassifier = nn.Linear(in_features=512, out_features=168)
        self.cosVowelClassifier = nn.Linear(in_features=512, out_features=11)
        self.cosConsonantClassifier = nn.Linear(in_features=512, out_features=7)

    def forward(self, x):
        root = self.backbone(x)
        root = self.cosRootClassifier(root)

        vowel = self.backbone(x)
        vowel = self.cosVowelClassifier(vowel)

        consonant = self.backbone(x)
        consonant = self.cosConsonantClassifier(consonant)

        return root, vowel, consonant

In [None]:
def macro_recall_multi(pred_graphemes, true_graphemes,pred_vowels,true_vowels,pred_consonants,true_consonants, n_grapheme=20, n_vowel=11, n_consonant=4):
    pred_label_graphemes = torch.argmax(pred_graphemes, dim=1).cpu().numpy()
    true_label_graphemes = true_graphemes.cpu().numpy()
    pred_label_vowels = torch.argmax(pred_vowels, dim=1).cpu().numpy()
    true_label_vowels = true_vowels.cpu().numpy()
    pred_label_consonants = torch.argmax(pred_consonants, dim=1).cpu().numpy()
    true_label_consonants = true_consonants.cpu().numpy()   

    recall_grapheme = sklearn.metrics.recall_score(pred_label_graphemes, true_label_graphemes, average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_label_vowels, true_label_vowels, average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_label_consonants, true_label_consonants, average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    return final_score

In [None]:
from sklearn.model_selection import train_test_split
train_labels , valid_labels = train_test_split(labels, test_size=0.20, shuffle=False) ## Split Labels
train_df, valid_df = train_test_split(df, test_size=0.20, shuffle=False) ## split data
del df, labels
gc.collect()

In [None]:
train_dataset = BengaliDatasetTrain(train_df ,train_labels,transform = True) 
valid_dataset = BengaliDatasetTrain(valid_df ,valid_labels,transform = False) 
torch.cuda.empty_cache()
gc.collect()

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset,batch_size=BATCH_SIZE,shuffle=True)
del train_df, valid_df, train_labels, valid_labels 
torch.cuda.empty_cache()
gc.collect()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
epoch = 0
criterion = nn.CrossEntropyLoss()

In [None]:
model = EffMultioutputNet()
optimizer = LARSOptimizer(model.parameters(), lr=0.02, weight_decay=1e-3) 

In [None]:
checkpoint = torch.load(CHECKPOINT_FNAME)
model.load_state_dict(checkpoint['model_state_dict'])
# last_epoch = checkpoint['epoch']
last_epoch = -1 # from scratch

model = model.to(device)
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=2e-5, epochs=N_EPOCHS, steps_per_epoch=len(train_loader), pct_start=0.0)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [None]:
def train(epoch, history):
    model.train()
    loss = 0.0
    acc= 0.0
    total = 0.0
    running_loss = 0.0
    running_acc = 0.0
    running_recall = 0.0
    for idx, (inputs,labels1,labels2,labels3) in tqdm(enumerate(train_loader), total=len(train_loader)):
        inputs = inputs.float().to(device)
        labels1 = labels1.long().to(device)
        labels2 = labels2.long().to(device)
        labels3 = labels3.long().to(device)
        total += len(inputs)
        optimizer.zero_grad()
        outputs1,outputs2,outputs3 = model(inputs.unsqueeze(1))
        loss1 = 2*criterion(outputs1,labels1)
        loss2 = 1* criterion(outputs2,labels2)
        loss3 = 1*criterion(outputs3,labels3)
        running_loss += loss1.item()+loss2.item()+loss3.item()
        running_recall+= macro_recall_multi(outputs1,labels1,outputs2,labels2,outputs3,labels3)
        running_acc += (outputs1.argmax(1)==labels1).float().mean()
        running_acc += (outputs2.argmax(1)==labels2).float().mean()
        running_acc += (outputs3.argmax(1)==labels3).float().mean()
        (loss1+loss2+loss3).backward()
        optimizer.step()
        scheduler.step()
        
        
    print(' running_recall : ', running_recall)
    print(' running_loss : ', running_loss)
    print(' running_acc : ', running_acc)
    loss = running_loss/len(train_loader)
    acc = running_acc/(len(train_loader)*3)
    print(' train epoch : {}\tacc : {:.2f}%'.format(epoch,running_acc/(len(train_loader)*3)))
    print('loss : {:.4f}'.format(running_loss/len(train_loader)))
    print('recall: {:.4f}'.format(running_recall/len(train_loader)))
    total_train_recall = running_recall/len(train_loader)
    torch.cuda.empty_cache()
    gc.collect()
    history.loc[epoch, 'train_loss'] = loss
    history.loc[epoch,'train_acc'] = acc.cpu().numpy()
    history.loc[epoch,'train_recall'] = total_train_recall
    return  total_train_recall

def evaluate(epoch,history):
    model.eval()
    loss = 0.0
    acc = 0.0
    total = 0.0
    running_loss = 0.0
    running_acc = 0.0
    running_recall = 0.0
    with torch.no_grad():
        for idx, (inputs,labels1,labels2,labels3) in tqdm(enumerate(valid_loader), total=len(valid_loader)):
            inputs = inputs.float().to(device)
            labels1 = labels1.long().to(device)
            labels2 = labels2.long().to(device)
            labels3 = labels3.long().to(device)
            total += len(inputs)
            outputs1,outputs2,outputs3 = model(inputs.unsqueeze(1))
            loss1 = 2*criterion(outputs1,labels1)
            loss2 = criterion(outputs2,labels2)
            loss3 = criterion(outputs3,labels3)
            running_loss += loss1.item()+loss2.item()+loss3.item()
            running_recall+= macro_recall_multi(outputs1,labels1,outputs2,labels2,outputs3,labels3)
            running_acc += (outputs1.argmax(1)==labels1).float().mean()
            running_acc += (outputs2.argmax(1)==labels2).float().mean()
            running_acc += (outputs3.argmax(1)==labels3).float().mean()
                
    loss = running_loss/len(valid_loader)
    acc = running_acc/(len(valid_loader)*3)
    total_recall = running_recall/len(valid_loader) 
    print('val epoch: {} \tval acc : {:.2f}%'.format(epoch,running_acc/(len(valid_loader)*3)))
    print('loss : {:.4f}'.format(running_loss/len(valid_loader)))
    print('recall: {:.4f}'.format(running_recall/len(valid_loader)))
    history.loc[epoch, 'valid_loss'] = loss
    history.loc[epoch, 'valid_acc'] = acc.cpu().numpy()
    history.loc[epoch, 'valid_recall'] = total_recall
    return  total_recall

In [None]:
# Training

In [None]:
if TRAIN:
    valid_recall = 0.0
    best_valid_recall = 0.0
    torch.cuda.empty_cache()
    gc.collect()
    for epoch in range(last_epoch+1, N_EPOCHS):
        torch.cuda.empty_cache()
        gc.collect()
        train_recall = train(epoch, history)
        valid_recall = evaluate(epoch, history)
        if valid_recall > best_valid_recall:
            print(f'Validation recall has increased from:  {best_valid_recall:.4f} to: {valid_recall:.4f}. Saving checkpoint')
            torch.save({
                    'epoch': epoch,
                    'model' : EffMultioutputNet()
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                },  + '_' + str(epoch)) ## Saving model weights based on best validation accuracy.
            history.to_csv(LOG_FNAME)
            best_valid_recall = valid_recall

In [None]:
class BengaliDatasetTest(Dataset):

    def __init__(self, fname):
        print(fname)
        self.df = pd.read_parquet(fname)
        self.data = 255 - self.df.iloc[:, 1:].values.reshape(-1,HEIGHT, WIDTH).astype(np.uint8)

    def __str__(self):
        string  = ''
        string += '\tlen = %d\n'%len(self)
        string += '\n'
        return string

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name = self.df.iloc[idx,0]
        #normalize each image by its max val
        img = (self.data[idx]*(255.0/self.data[idx].max())).astype(np.uint8)
        img = crop_resize(img)
        img = img.astype(np.float32)/255.0
        return img, name

In [None]:
labels = pd.read_csv(TEST_LABELS)
print("labels shape = ", labels.shape)

In [None]:
row_id,target = [],[]
for fname in TEST:
    test_image = BengaliDatasetTest(fname)
    dl = torch.utils.data.DataLoader(test_image,batch_size=128,num_workers=4,shuffle=False)
    with torch.no_grad():
        for x,y in tqdm(dl):
            x = x.unsqueeze(1).float().cuda()
            p1,p2,p3 = model(x)
            p1 = p1.argmax(-1).view(-1).cpu()
            p2 = p2.argmax(-1).view(-1).cpu()
            p3 = p3.argmax(-1).view(-1).cpu()
            for idx,name in enumerate(y):
                row_id += [f'{name}_vowel_diacritic',f'{name}_grapheme_root',
                           f'{name}_consonant_diacritic']
                target += [p1[idx].item(),p2[idx].item(),p3[idx].item()]
                
sub_df = pd.DataFrame({'row_id': row_id, 'target': target})
sub_df.to_csv('submission.csv', index=False)
sub_df.head(20)