In [None]:
from PIL import Image
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset, TensorDataset
from sklearn.model_selection import train_test_split
from tqdm import notebook
import torchvision
from torchvision import transforms
from torchvision import models
from collections import Counter
from pathlib import Path
from sklearn.metrics import confusion_matrix, recall_score
import seaborn as sn
import pyarrow.parquet as pq
from skimage.filters import threshold_otsu
from skimage.transform import AffineTransform, SimilarityTransform, warp, resize

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
df_test = pd.read_csv('/kaggle/input/bengaliai-cv19/test.csv')

In [None]:
def thresh(t):
    t = t.clone().detach()
    t.mul_(-1)
    nn.functional.threshold_(t, -200, -255)
    t.mul_(-1)
    return t
def make_tensordataset_from_dfs(parquet_locs, label_loc=None):
    ids = []
    X = []
    for parquet_loc in parquet_locs:
        df = pd.read_parquet(parquet_loc)
        ids.extend(df.image_id.tolist())
        x = df.iloc[:, 1:].to_numpy(dtype=np.uint8)
        del df
        X.append(x)
    X = np.vstack(X)
    X = X.reshape(-1, 1, 137, 236)
    X = torch.from_numpy(X)
#     thresh(X)
    ids = dict((s,i) for (i,s) in enumerate(ids))
    if label_loc is None:
        return TensorDataset(X)
    else:
        graphemes = torch.zeros(X.shape[0], dtype=torch.long)
        vowel_diacs = torch.zeros(X.shape[0], dtype=torch.long)
        consonant_diacs = torch.zeros(X.shape[0], dtype=torch.long)
        lbl_df = pd.read_csv(label_loc)
        for row in lbl_df.itertuples():
            if row.image_id not in ids:
                continue
            idx = ids[row.image_id]
            graphemes[idx] = row.grapheme_root
            vowel_diacs[idx] = row.vowel_diacritic
            consonant_diacs[idx] = row.consonant_diacritic
        return TensorDataset(X, graphemes, vowel_diacs, consonant_diacs)

In [None]:
class DatasetWithImageTransforms(Dataset):
    def __init__(self, ds, transforms):
        super(DatasetWithImageTransforms, self).__init__()
        self.ds = ds
        self.tr = transforms
        self.nt = len(self.ds[0])
        self.ln = len(self.ds)
        
    def __getitem__(self, index):
        img, *rest = self.ds[index]
        img = self.tr(img)
        return (img,) + tuple(rest)
    
    def __len__(self):
        return self.ln

In [None]:
# Returns binary image
def thresh(img):
    thresh_val = int(threshold_otsu(img))
    img = (img > thresh_val)
    return img

# For binary image
def bounding_box(img):
    img = thresh(img).astype(np.uint8)
    # find the min value of each column
    col_min_val = np.min(img, axis=0)
    # find the min value of each row
    row_min_val = np.min(img, axis=1)
    # argwhere finds the non-zero elements we want to find the zero elements (zeros are part of character)
    col = np.argwhere(1 - col_min_val).flatten()
    row = np.argwhere(1 - row_min_val).flatten()
    return row.min(), row.max(), col.min(), col.max()

def scale_to_bb(img):
    height = img.shape[0]
    width = img.shape[1]
    t, b, l, r = bounding_box(img)
    box_width = r - l
    box_height = b - t
    t, l = max(0, t - 10), max(0, l - 10)
    b, r = min(height, b + 10), min(width, r + 10)
#     print(l, r, t, b)
    img = resize(img[t:b, l:r], output_shape=(256, 256), preserve_range=True, order=3, cval=1.0)
    return img

def random_scale(img):
    height = img.shape[0]
    width = img.shape[1]
    t, b, l, r = bounding_box(img)
    box_width = r - l
    box_height = b - t
    max_width_scale = (box_width + min(l, width - r)) / box_width
    max_height_scale = (box_height + min(t, height - b)) / box_height
    max_scale = min(max_width_scale, max_height_scale)
    min_scale = min(1.0, 0.25 * max((height / box_height), (width / box_width)))
    scale = random.uniform(min_scale, max_scale)
    tfm = SimilarityTransform(
        scale=(scale, scale),
    )
    img = warp(img, tfm.inverse, cval=1.0, order=3)
    return img

def random_translate(img):
    height = img.shape[0]
    width = img.shape[1]
    t, b, l, r = bounding_box(img)
    box_width = r - l
    box_height = b - t
    translate_height = random.uniform(-t, height - b)
    translate_width = random.uniform(-l, width - r)
    tfm = SimilarityTransform(
        translation=(translate_width, translate_height),
    )
    img = warp(img, tfm.inverse, cval=1.0, order=3)
    return img

def random_rotate_and_shear(img):
    max_theta = math.pi / 16
    theta = random.uniform(-max_theta, max_theta)
    max_shear_theta = math.pi / 8
    shear_theta = random.uniform(-max_shear_theta, max_shear_theta)
    tfm = AffineTransform(rotation=theta, shear=shear_theta)
    img = warp(img, tfm.inverse, cval=1.0, order=3)
    return img

def invert_color(t):
    t.mul_(-1)
    t.add_(255)
    return t

def affine_transforms(img):
    img = img.reshape(137, 236).numpy()
    img = thresh(img).astype(np.float32)
#     img = random_translate(random_scale(random_rotate_and_shear(img)))
    img = random_translate(random_scale(img))
    img = img.reshape(1, 137, 236)
    return torch.from_numpy(img)

def tfms(img):
    img = img.reshape(137, 236).numpy()
    img = thresh(img).astype(np.float32)
    img = scale_to_bb(img)
    img = img.reshape(1, 256, 256)
    return torch.from_numpy(img)

def va_tfms(img):
    img = img.reshape(137, 236).numpy()
    img = thresh(img).astype(np.float32)
    img = img.reshape(1, 137, 236)
    return torch.from_numpy(img)

affine_transforms = transforms.Lambda(affine_transforms)
mult = transforms.Lambda(lambda img: img * 255)
to_float = transforms.Lambda(lambda img: img.float())
invert_color = transforms.Lambda(invert_color)
tfms = transforms.Compose([
#     transforms.RandomApply([
        tfms,
#         mult,
#     ], p=0.90),
    to_float,
])

In [None]:
n_graphemes, n_vowel_diacs, n_consonant_diacs = 168, 11, 7

In [None]:
class MultiTaskNN(nn.Module):
    def __init__(self, base, task_predictors):
        super(MultiTaskNN, self).__init__()
        self.base = base
        self.task_predictors = nn.ModuleList(task_predictors)
        
    def freeze(self):
        for p in self.base.parameters():
            p.requires_grad = False
        
    def unfreeze(self):
        for p in self.base.parameters():
            p.requires_grad = True
        
    def forward(self, x):
        features = self.base(x)
        preds = [predictor(features) for predictor in self.task_predictors]
        return preds

In [None]:
!ls /kaggle/input/

In [None]:
def load_model(name, num_tasks):
    base = torch.jit.load(name + '-base.trace')
    task_predictors = [torch.jit.load(name + '-task-predictor-{}.trace'.format(i)) for i in range(num_tasks)]
    return MultiTaskNN(base, task_predictors)

In [None]:
model = load_model('/kaggle/input/bhgd-post-exp/r18-post-exp-1', 3)

In [None]:
# # base, feature_size = models.resnet152(pretrained=False), 2048
# # base.fc = nn.Identity()
# # conv1 = nn.Conv2d(3, 96, kernel_size=(3, 3), stride=(2, 2), bias=False)
# # conv1.weight.data = torch.sum(base.conv1.weight.data, dim=1, keepdim=True)
# # base.conv1 = conv1

# base = pretrainedmodels.__dict__['se_resnext101_32x4d'](pretrained=None)
# base.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
# base.last_linear = nn.Identity()
# feature_size = 2048
# conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
# conv1.weight.data = torch.sum(base.layer0.conv1.weight.data, dim=1, keepdim=True)
# base.layer0.conv1 = conv1

# # base = pretrainedmodels.__dict__['dpn107']()
# # # base.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
# # base.last_linear = nn.Identity()
# # feature_size = 2688
# # conv = nn.Conv2d(1, 128, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
# # conv.weight.data = torch.sum(base.features.conv1_1.conv.weight.data, dim=1, keepdim=True)
# # base.features.conv1_1.conv = conv

# # base = pretrainedmodels.__dict__['pnasnet5large'](num_classes=1000)
# # base.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
# # base.last_linear = nn.Identity()
# # base.dropout = nn.Identity()
# # feature_size = 4320
# # conv = nn.Conv2d(1, 96, kernel_size=(3, 3), stride=(2, 2), bias=False)
# # conv.weight.data = torch.sum(base.conv_0.conv.weight.data, dim=1, keepdim=True)
# # base.conv_0.conv = conv

# n_classes_tasks = [n_graphemes, n_vowel_diacs, n_consonant_diacs]
# # depth_tasks = [2, 1, 1]
# # task_predictors = [
# #     make_ff_predictor(feature_size, 512, n_classes, depth) 
# #     for n_classes, depth in zip(n_classes_tasks, depth_tasks)
# # ]
# task_predictors = [nn.Linear(feature_size, n_classes, bias=False) for n_classes in n_classes_tasks]

# model = MultiTaskNN(base, task_predictors).to(device)

# model.load_state_dict(torch.load('/kaggle/input/bhgd-seresnext-101/seresnext101-itstrat-sgd-12-1e1-12-1e0-16-8.pth', map_location=device))

In [None]:
model = model.to(device)
model.eval();

In [None]:
results = []

In [None]:
with torch.no_grad():
    for i in range(4):
        te_ds = make_tensordataset_from_dfs(
            ['/kaggle/input/bengaliai-cv19' + '/test_image_data_{}.parquet'.format(i)]
        )
        # TTA
        te_ds = DatasetWithImageTransforms(te_ds, tfms)
        num_tta = 1
        te_dl = DataLoader(te_ds, batch_size=256, num_workers=2, pin_memory=True)
        tta_results = [[] for i in range(num_tta)]
        for i in range(num_tta):
            for imgs, in te_dl:
                imgs = imgs.to(device)
#                 imgs = imgs / 255.0
                g_pred, v_pred, c_pred, *rest = model(imgs)
                tta_results[i].append([g_pred, v_pred, c_pred])
        
        for i in range(len(te_dl)):
            g_preds = [results[i][0] for results in tta_results]
            v_preds = [results[i][1] for results in tta_results]
            c_preds = [results[i][2] for results in tta_results]
            g_pred = torch.stack(g_preds).mean(dim=0)
            v_pred = torch.stack(v_preds).mean(dim=0)
            c_pred = torch.stack(c_preds).mean(dim=0)
            g_pred = g_pred.argmax(1).tolist()
            v_pred = v_pred.argmax(1).tolist()
            c_pred = c_pred.argmax(1).tolist()
            
            for g, v, c in zip(g_pred, v_pred, c_pred):
                results.extend([c, 0, v])
        del imgs
        del te_ds
        del te_dl
        del tta_results

In [None]:
del model

In [None]:
submission_df = pd.read_csv('/kaggle/input/bengaliai-cv19' + '/sample_submission.csv')

In [None]:
submission_df['target'] = results

In [None]:
submission_df.to_csv('./submission.csv', index=False)

In [None]:
submission_df