In [None]:
import sys
pt_models = "../input/pretrained-models/pretrained-models.pytorch-master/"
sys.path.insert(0, pt_models)
import pretrainedmodels

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import albumentations
import gc
from tqdm import tqdm
import cv2
from PIL import Image
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
ORIGINAL_HEIGHT = 137
ORIGINAL_WIDTH = 236
PROCESSED_HEIGHT = 128
PROCESSED_WIDTH = 128
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
def normalize_image(img):
    return (img*(255.0/img.max())).astype(np.uint8)

def get_min_indices(img, min_writing_value=80):
    min_value = img > min_writing_value
    h_min, h_max = np.where(np.any(min_value, axis=0))[0][[0, -1]]
    v_min, v_max = np.where(np.any(min_value, axis=1))[0][[0, -1]]
    return (h_min, h_max, v_min, v_max)

def get_min_indices_with_border(img, min_writing_value=80, border=20):
    h_min, h_max, v_min, v_max = get_min_indices(img[border:-border, border:-border], min_writing_value=min_writing_value)
    return (h_min + border, h_max + border, v_min + border, v_max + border) #indices ignored border, is added again

def cut_and_denoise_image(img, border=20, min_writing_value=80, max_noise=28):
    #cut minimum needed to encease image
    h_min, h_max, v_min, v_max = get_min_indices_with_border(img, border=border, min_writing_value=min_writing_value)
    #add tolerance around minium, making it dependend on border prevents missing part of the character
    h_min= (h_min-border) if h_min>border else 0
    v_min= (v_min-border) if v_min>border else 0
    h_max= (h_max+border) if ORIGINAL_WIDTH-h_max>border else ORIGINAL_WIDTH
    v_max= (v_max+border) if ORIGINAL_HEIGHT-v_max>border else ORIGINAL_HEIGHT
    #cut image
    img = img[v_min:v_max, h_min:h_max]
    #denoise
    img[img < max_noise] = 0
    #add padding to image
    longer_side_length = max(np.ma.size(img, axis=0), np.ma.size(img, axis=1))
    padding = [((longer_side_length - np.ma.size(img, axis=0)) // 2,),
               ((longer_side_length - np.ma.size(img, axis=1)) // 2,)]
    img = np.pad(img, padding, mode="constant")
    #return resized image
    return cv2.resize(img,(PROCESSED_HEIGHT, PROCESSED_WIDTH))

In [None]:
def load_data(file_name):
    print("Dealing with ", file_name)
    df = pd.read_parquet(file_name)
    names = df.iloc[:,0].values
    images = 255 - df.iloc[:,1:].values.reshape(-1, ORIGINAL_HEIGHT, ORIGINAL_WIDTH)
    del df
    print("Freed after loading: ", gc.collect())
    for image_index, img in enumerate(images):
            img = normalize_image(img)
            img = cut_and_denoise_image(img)
            images[image_index,0:PROCESSED_HEIGHT, 0:PROCESSED_WIDTH] = img#inplace to save RAM
    images = images[:,0:PROCESSED_HEIGHT, 0:PROCESSED_WIDTH]#cut off unneeded part
    #images = images.astype(np.float32) /255.0 #conversion before division important, halves the RAM usage as NumPy defaults to float64!
    images = images.reshape(-1, PROCESSED_HEIGHT, PROCESSED_WIDTH)
    print("Freed after processing: ", gc.collect())
    return names, images

In [None]:
#add header
with open("submission.csv", "a") as submission_file:
    submission_file.write("row_id,target\n")
    
def write_predictions(names, prediction):
    chosen_root = np.argmax(prediction[0].detach().numpy(), axis=1)
    chosen_vowel = np.argmax(prediction[1].detach().numpy(), axis=1)
    chosen_consonant = np.argmax(prediction[2].detach().numpy(), axis=1)
    with open("submission.csv", "a") as submission_file:
        for index, name in enumerate(names):
            submission_file.write(f"{name}_consonant_diacritic,{chosen_consonant[index]}\n")
            submission_file.write(f"{name}_grapheme_root,{chosen_root[index]}\n")
            submission_file.write(f"{name}_vowel_diacritic,{chosen_vowel[index]}\n")

In [None]:
class SubmissionDataSet():
    transform = albumentations.Compose([albumentations.Normalize(always_apply=True)])
    
    def __init__(self, names, images):
        self.names = names
        self.images = images
        
    def __len__(self):
        return len(self.names)

    def __getitem__(self, index):
        img = self.images[index]
        img = Image.fromarray(img).convert("RGB")
        img = self.transform(image=np.array(img))["image"]
        img = np.transpose(img, (2,0,1))
        return (self.names[index], torch.tensor(img))

In [None]:
class Net(torch.nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.transfer_model = pretrainedmodels.__dict__["resnet101"](pretrained=None)
        
        self.drop1 = torch.nn.Dropout(p=0.3)
        self.l1 = torch.nn.Linear(2048, 1024)
        self.drop2 = torch.nn.Dropout(p=0.3)
        self.l2 = torch.nn.Linear(1024, 512)
        
        self.root = torch.nn.Linear(512, 168)
        self.vowel = torch.nn.Linear(512, 11)
        self.consonant = torch.nn.Linear(512, 7)
        
    def forward(self, x):
        batch_size = x.shape[0]
        x = self.transfer_model.features(x)
        x = torch.nn.functional.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        
        x = self.drop1(x)
        x = self.l1(x)
        x = self.drop2(x)
        x = self.l2(x)
        
        root = self.root(x)
        vowel = self.vowel(x)
        consonant = self.consonant(x)
        return (root, vowel, consonant)
        return (root, vowel, consonant)
    
model = Net()
model.load_state_dict(torch.load("/kaggle/input/bengalipytorchmodels/ResNet101_30Epochs.pt", map_location=torch.device('cpu')))
model.eval()

In [None]:
for file_index in range(0, 4):
    names, images = load_data(f"/kaggle/input/bengaliai-cv19/test_image_data_{file_index}.parquet")
    batch_data_set = SubmissionDataSet(names, images)
    data_loader = torch.utils.data.DataLoader(batch_data_set,
                                              batch_size=8,
                                              shuffle=False,
                                              num_workers=4)
    for batch_index, (batch_names, batch_images) in enumerate(tqdm(data_loader, total=(len(batch_data_set)//data_loader.batch_size))):
        prediction = model(batch_images)
        write_predictions(batch_names, prediction)
        
    print("Collected after writing predictions: ", gc.collect())