In [23]:
import sys
sys.path.insert(0, "../input/pretrained-models/pretrained-models.pytorch-master")
sys.path.insert(0, "../input/efficientnet-pytorch/EfficientNet-PyTorch-master")
sys.path.insert(0, "../input/iterstat2/iterative-stratification-master")

In [24]:
import argparse
import ast
import glob
import os
import shutil

import albumentations
import joblib
import numpy as np
import pandas as pd
import pretrainedmodels
import sklearn
import torch
import torch.nn as nn
from PIL import Image
from efficientnet_pytorch import EfficientNet
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from torch.nn import functional as F
from tqdm import tqdm

INPUT_FOLDER = "../input/bengaliai-cv19/"
TRAINING_FOLDS_CSV = "train_folds.csv"
TESTING_CSV = "test.csv"
PICKLE_FOLDER = 'image_pickles/'

CONSONANT_DIACRITIC = "consonant_diacritic"
VOWEL_DIACRITIC = "vowel_diacritic"
GRAPHEME_ROOT = "grapheme_root"
KFOLD = "kfold"
IMAGE_ID = "image_id"
COLUMNS = [GRAPHEME_ROOT, VOWEL_DIACRITIC, CONSONANT_DIACRITIC, IMAGE_ID, KFOLD]

RESNET = 'resnet34'
EFFNET = 'efficientnet-b3'
BASE_MODELS = [RESNET, EFFNET]
MODEL_MEAN = ast.literal_eval("(0.485, 0.456, 0.406)")
MODEL_STD = ast.literal_eval("(0.229, 0.224, 0.225)")

CUDA_VISIBLE_DEVICES = 1
IMG_HEIGHT = 137
IMG_WIDTH = 236
EPOCHS = 50
TRAIN_BATCH_SIZE = 128
TEST_BATCH_SIZE = 32


In [25]:
class ResNet34(nn.Module):
    def __init__(self, pretrained):
        super().__init__()
        if pretrained:
            self.model = pretrainedmodels.__dict__["resnet34"](pretrained="imagenet")
        else:
            self.model = pretrainedmodels.__dict__["resnet34"](pretrained=None)

        self.l0 = nn.Linear(512, 168)
        self.l1 = nn.Linear(512, 11)
        self.l2 = nn.Linear(512, 7)

    def forward(self, x):
        bs, _, _, _ = x.shape
        x = self.model.features(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
        l0 = self.l0(x)
        l1 = self.l1(x)
        l2 = self.l2(x)

        return l0, l1, l2


class EfficientNetB3(nn.Module):
    def __init__(self, pretrained):
        super(EfficientNetB3, self).__init__()

        # Load imagenet pre-trained model
        self.effNet = EfficientNet.from_name('efficientnet-b3')

        # Appdend output layers based on our date
        self.fc_root = nn.Linear(in_features=1000, out_features=168)
        self.fc_vowel = nn.Linear(in_features=1000, out_features=11)
        self.fc_consonant = nn.Linear(in_features=1000, out_features=7)

    def forward(self, X):
        output = self.effNet(X)
        output_root = self.fc_root(output)
        output_vowel = self.fc_vowel(output)
        output_consonant = self.fc_consonant(output)

        return output_root, output_vowel, output_consonant

class TestDataset:
    def __init__(self, df, img_height, img_width, mean, std):
        self.image_ids = df.image_id.values
        self.img_arr = df.iloc[:, 1:].values

        self.aug = albumentations.Compose([
            albumentations.Resize(img_height, img_width, always_apply=True),
            albumentations.Normalize(mean, std, always_apply=True)
        ])

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, item):
        image = self.img_arr[item, :]
        img_id = self.image_ids[item]

        image = image.reshape(137, 236).astype(float)
        image = Image.fromarray(image).convert("RGB")
        image = self.aug(image=np.array(image))["image"]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)

        return {
            "image": torch.tensor(image, dtype=torch.float),
            "image_id": img_id
        }

def test(base_model):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"using device: {device}")

    model_dispatcher = {RESNET: ResNet34, EFFNET: EfficientNetB3}
    model = model_dispatcher[base_model](pretrained=False)
    final_g_pred = []
    final_v_pred = []
    final_c_pred = []
    final_img_ids = []
    for fold in range(5):
        model_path = f"../input/trained-models/{base_model}_fold{fold}.pth"
        model.load_state_dict(torch.load(model_path, map_location=device))
        print(f"Loaded model: {model_path}")
        model.to(device)
        model.eval()

        c_pred, g_pred, img_ids_list, v_pred = predict_using_model(device, model)

        final_g_pred.append(g_pred)
        final_v_pred.append(v_pred)
        final_c_pred.append(c_pred)
        if fold == 0:
            final_img_ids.extend(img_ids_list)

    create_submission_csv(final_c_pred, final_g_pred, final_img_ids, final_v_pred)


def predict_using_model(device, model):
    g_pred, v_pred, c_pred = [], [], []
    img_ids_list = []
    for file_idx in range(4):
        parquet_path = "%stest_image_data_%s.parquet" % (INPUT_FOLDER, file_idx)
        df = pd.read_parquet(parquet_path)
        print(f"Loaded file: {parquet_path}")

        dataset = TestDataset(df=df,
                              img_height=IMG_HEIGHT,
                              img_width=IMG_WIDTH,
                              mean=MODEL_MEAN,
                              std=MODEL_STD)

        data_loader = torch.utils.data.DataLoader(
            dataset=dataset,
            batch_size=TEST_BATCH_SIZE,
            shuffle=False,
            num_workers=4
        )

        for bi, d in enumerate(data_loader):
            image = d["image"]
            img_id = d["image_id"]
            image = image.to(device, dtype=torch.float)

            grapheme_pred, vowel_pred, consonant_pred = model(image)

            for ii, imid in enumerate(img_id):
                g_pred.append(grapheme_pred[ii].cpu().detach().numpy())
                v_pred.append(vowel_pred[ii].cpu().detach().numpy())
                c_pred.append(consonant_pred[ii].cpu().detach().numpy())
                img_ids_list.append(imid)
    return c_pred, g_pred, img_ids_list, v_pred


def create_submission_csv(final_c_pred, final_g_pred, final_img_ids, final_v_pred):
    final_g = np.argmax(np.mean(np.array(final_g_pred), axis=0), axis=1)
    final_v = np.argmax(np.mean(np.array(final_v_pred), axis=0), axis=1)
    final_c = np.argmax(np.mean(np.array(final_c_pred), axis=0), axis=1)
    predictions = []
    for ii, imid in enumerate(final_img_ids):
        predictions.append((f"{imid}_grapheme_root", final_g[ii]))
        predictions.append((f"{imid}_vowel_diacritic", final_v[ii]))
        predictions.append((f"{imid}_consonant_diacritic", final_c[ii]))
    submission = pd.DataFrame(predictions, columns=["row_id", "target"])
    submission.to_csv("submission.csv", index=False)


In [26]:
test("resnet34")
test("efficientnet-b3")

using device: cuda:0
Loaded model: ../input/trained-models/resnet34_fold0.pth
Loaded file: ../input/bengaliai-cv19/test_image_data_0.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_1.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_2.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_3.parquet
Loaded model: ../input/trained-models/resnet34_fold1.pth
Loaded file: ../input/bengaliai-cv19/test_image_data_0.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_1.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_2.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_3.parquet
Loaded model: ../input/trained-models/resnet34_fold2.pth
Loaded file: ../input/bengaliai-cv19/test_image_data_0.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_1.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_2.parquet
Loaded file: ../input/bengaliai-cv19/test_image_data_3.parquet
Loaded model: ../input/trained-models/resnet34_fold3