In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [None]:
import pandas as pd
import torch
import matplotlib.pyplot as plt
import os
import random
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torchvision.transforms import transforms
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torchvision.models import resnext50_32x4d
from torch.optim import lr_scheduler
import pyarrow.parquet as pq
import cv2

In [None]:
data_path = '/kaggle/input/bengaliai-cv19/'

In [None]:
HEIGHT = 137
WIDTH = 236
TARGET_SIZE = 64
PADDING = 8

In [None]:
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax


def crop_resize(img0, size=128, pad=8):
    # crop a box around pixels large than the threshold
    # some images contain line at the sides
    ymin, ymax, xmin, xmax = bbox(img0[5:-5, 5:-5] > 80)

    # cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax, xmin:xmax]

    # remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax - xmin, ymax - ymin
    ls = max(lx, ly) + pad

    # make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((ls - ly) // 2,), ((ls - lx) // 2,)], mode='constant')

    return cv2.resize(img, (size, size))

In [None]:
class ToImage:
    def __init__(self, df):
        self.df = df
        
    def __call__(self,image_id):
#         image_data = np.array(self.df.iloc[image_id][:].values, dtype=np.uint8)
        image_data = self.df.iloc[image_id, :].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
        image_data = 255-image_data
        image_data = (image_data*(255.0/image_data.max())).astype(np.uint8)
        return torch.from_numpy(crop_resize(image_data, size=TARGET_SIZE, pad=PADDING)).float()

In [None]:
class BengaliDataset(Dataset):
    def __init__(self, df, data, transform=None):
        self.transform = transform
        self.df = df
        self.to_image = ToImage(data)
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        image = self.to_image(idx)
        output = torch.tensor([])
        
        if self.transform:
            image = self.transform(image)

        sample = {'image': image.unsqueeze(0), 'output': output, 'df_idx' : self.df[idx]}

        return sample

In [None]:
class ModifiedResnext(nn.Module):
    def __init__(self):
        super().__init__()
        self.arch = resnext50_32x4d()
        f_features = self.arch.fc.in_features
        
        self.head_dropout = 0.1
        
        old_conv1 = self.arch.conv1
        self.arch.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        with torch.no_grad():
            self.arch.conv1.weight = nn.Parameter(old_conv1.weight.data[:, 0, :, :].unsqueeze(1))        
        
        self.last = nn.Sequential(
            nn.BatchNorm2d(f_features),
            nn.ReLU()
        )
        
        self.fc_graph = nn.Linear(f_features, 168)
        self.fc_vowel = nn.Linear(f_features, 11)
        self.fc_conso = nn.Linear(f_features, 7)   
        
    def forward_init_layers(self, x):
        x = self.arch.conv1(x)
        x = self.arch.bn1(x)
        x = self.arch.relu(x)
        x = self.arch.maxpool(x)

        x = self.arch.layer1(x)
        x = self.arch.layer2(x)
        x = self.arch.layer3(x)
        x = self.arch.layer4(x)

        x = self.last(x)

        return x
        
    def forward(self, x):
        batch_size, C, H, W = x.shape
        
#         x = (x - MEAN) / (STD)
        x = self.forward_init_layers(x)

        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        x = F.dropout(x, self.head_dropout, self.training)

        fc_graph = self.fc_graph(x)
        fc_vowel = self.fc_vowel(x)
        fc_conso = self.fc_conso(x)

        return fc_graph, fc_vowel, fc_conso

In [None]:
net = ModifiedResnext()

In [None]:
net.load_state_dict(torch.load('../input/rexnet50-trained/network_29.pth', map_location=torch.device('cpu')))

In [None]:
prefix = 'test_image_data_'
df_file = 'test.csv'

In [None]:
test_df = pd.read_csv(data_path + df_file)

In [None]:
torch.cuda.is_available()

In [None]:
row_id = []
target = []

if torch.cuda.is_available():
    net.cuda()
net.eval()

for i in range(4):
    test_df = pq.read_pandas(data_path + f'{prefix}{i}.parquet').to_pandas()
    test_data = test_df.iloc[:,1:]
    test_df = test_df['image_id']
    
    bd_test = BengaliDataset(test_df, test_data)
    test_batch = DataLoader(bd_test, batch_size=1)
    
    
    for inp in test_batch:
        if torch.cuda.is_available():
            inp['image'] = inp['image'].cuda()
        with torch.no_grad():
            out = net(inp['image'])
        (out_gr, out_vd, out_cd) = out
                
        t_idx = inp['df_idx'][0]

        row_id.extend([f'{t_idx}_consonant_diacritic', f'{t_idx}_grapheme_root', f'{t_idx}_vowel_diacritic'])
        target.extend([int(out_cd.argmax().cpu().detach().numpy()), int(out_gr.argmax().cpu().detach().numpy()), int(out_vd.argmax().cpu().detach().numpy())])
        
    print(f'Finished {prefix}{i}.parquet')
    del test_df
    del test_data
    del bd_test
    del test_batch

In [None]:
submission_df = pd.DataFrame({'row_id': row_id, 'target': target})

In [None]:
submission_df.to_csv('submission.csv', index=False)

In [None]:
submission_df