In [None]:
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torchvision import models
from torch.utils.data import Dataset, DataLoader


class APINet(nn.Module):
    ''' Module of API_Net
    There are some unused layers in it but to
    prevent from errors of loading the weight
    model, I kept them there.
    '''

    def __init__(self, out_channel):
        super(APINet, self).__init__()
        # use densenet121 as back bone
        backbone = models.densenet121(pretrained=False)
        backbone.features.conv0 = nn.Conv2d(1, 64, kernel_size=(7, 7),
                                            stride=(2, 2), padding=(3, 3))
        num_feature = backbone.classifier.in_features
        layers = list(backbone.children())[:-1]
        self.conv = nn.Sequential(*layers)

        self.fc = nn.Linear(num_feature, out_channel)

        # layers below are unused
        embedding = 512
        self.map = nn.Sequential(
            nn.Linear(num_feature * 2, embedding),
            nn.LeakyReLU(inplace=True),
            nn.Linear(embedding, embedding),
            nn.LeakyReLU(inplace=True),
            nn.Linear(embedding, num_feature),
        )
        self.sigmoid = nn.Sigmoid()

        self.criterion = nn.CrossEntropyLoss()
        self.rank_criterion = nn.MarginRankingLoss(margin=0)
        self.softmax_layer = nn.Softmax(dim=1)

    def forward(self, images):
        conv_out = self.conv(images)
        pool_out = F.adaptive_avg_pool2d(conv_out, (1, 1)).squeeze()

        return self.fc(pool_out)


mat1 = np.arange(137)
mat2 = np.arange(236)


def Allign(image):
    ''' Allign words to the center of image '''
    img1 = image.mean(axis=1)
    img2 = image.mean(axis=0)
    imgall = img1.mean()

    mean_x = int((img1 * mat1).mean() / imgall)
    mean_y = int((img2 * mat2).mean() / imgall)

    _t = min(68, mean_x)
    _b = min(69, 137 - mean_x)
    _l = min(118, mean_y)
    _r = min(118, 236 - mean_y)
    # print(_t, _b, _l, _r)

    zeros = np.zeros((137, 236))
    zeros[68-_t:68 + _b, 118-_l:118 + _r] = \
        image[mean_x-_t:mean_x + _b, mean_y-_l:mean_y + _r]

    return zeros[68 - 64: 68 + 64, 118 - 96: 118 + 96]


class GraphemeDataset(Dataset):
    def __init__(self, images):
        self.images = 255 - images.reshape(-1, 137, 236)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx] / 255
        image = Allign(image) * 2 - 1
        return image


def test():
    model_root = APINet(168).cuda()
    model_root = nn.DataParallel(model_root)
    model_root.load_state_dict(torch.load(
        '/kaggle/input/models/DenseNet121root_3.pth'))
    model_root.eval()

    model_vowel = APINet(11).cuda()
    model_vowel = nn.DataParallel(model_vowel)
    model_vowel.load_state_dict(torch.load(
        '/kaggle/input/models/DenseNet121vowel_3.pth'))
    model_vowel.eval()

    model_consonant = APINet(7).cuda()
    model_consonant = nn.DataParallel(model_consonant)
    model_consonant.load_state_dict(torch.load(
        '/kaggle/input/models/DenseNet121consonant.pth'))
    model_consonant.eval()

    predictions = []

    for i in range(4):
        print(i)
        test = pd.read_parquet(
            '/kaggle/input/bengaliai-cv19/test_image_data_%d.parquet' % i)
        test_data = test.iloc[:, 1:].values
        del test
        test_image = GraphemeDataset(test_data)
        test_loader = torch.utils.data.DataLoader(test_image,
                                                  batch_size=256,
                                                  shuffle=False,
                                                  drop_last=False)

        with torch.no_grad():
            for data in test_loader:
                data = data.cuda()
                data = data.unsqueeze(1).float()

                labels = np.zeros((data.shape[0] * 3, ), dtype=np.int)

                vowel = model_vowel(data)
                labels[2::3] = vowel.argmax(1).cpu().detach().numpy()
                del vowel

                root = model_root(data)
                labels[1::3] = root.argmax(1).cpu().detach().numpy()
                del root

                consonant = model_consonant(data)
                labels[::3] = consonant.argmax(1).cpu().detach().numpy()
                del consonant

                predictions.append(labels)
                del data
                del labels

        del test_data
        del test_image
        del test_loader

    submission = pd.read_csv(
        '/kaggle/input/bengaliai-cv19/sample_submission.csv')
    submission.target = np.hstack(predictions)
    submission.head(10)
    submission.to_csv('submission.csv', index=False)


test()
