In [1]:
import numpy as np
import pandas as pd
import cv2

import time
import os
import gc
from tqdm import tqdm

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
HEIGHT = 137
WIDTH = 236

CROP_SIZE = 128

In [3]:
OUT_PATH = './images/'
os.mkdir(OUT_PATH)

* Load images from parquet/feather depending on submit
* crop and save images

In [4]:
preproc_time = time.time()

In [5]:
def crop_image(img, crop_size=CROP_SIZE):
    return cv2.resize(img, (crop_size, crop_size))

In [6]:
def reshape_data(data):
    return 255 - data.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)

def prepare_image(data_type='train', indices=[0, 1, 2, 3]):
    assert data_type in ['train', 'test']
    
    datadir = '/kaggle/input/bengaliai-cv19'
    featherdir = '/kaggle/input/bengaliaicv19feather'
    
    img_names = list()
    
    for i in indices:
        print(f'Preprocess part #{i + 1}/{len(indices)}')
        fname = os.path.join(datadir, f'{data_type}_image_data_{i}.parquet')
        image_df = pd.read_parquet(fname)

        img_names_part = image_df.iloc[:, 0].to_numpy()
        img_names.append(img_names_part)

        image_df = reshape_data(image_df)
        
        for idx in tqdm(range(len(image_df))):
            name = img_names_part[idx]
            img_path = os.path.join(OUT_PATH, f'{name}.png')
            img = crop_image(image_df[idx])
            cv2.imwrite(img_path, img)

        del image_df
        gc.collect()
    
    img_names = np.concatenate(img_names, axis=0)
    print(f'Preprocess complete. Number of images: {len(img_names)}')
    return img_names

In [7]:
img_names = prepare_image(data_type='test', indices=[0, 1, 2, 3])  # indices=[0, 1, 2, 3]

Preprocess part #1/4


100%|██████████| 3/3 [00:00<00:00, 170.00it/s]


Preprocess part #2/4


100%|██████████| 3/3 [00:00<00:00, 1155.99it/s]


Preprocess part #3/4


100%|██████████| 3/3 [00:00<00:00, 946.08it/s]


Preprocess part #4/4


100%|██████████| 3/3 [00:00<00:00, 1485.59it/s]

Preprocess complete. Number of images: 12





In [8]:
preproc_time = (time.time() - preproc_time)

In [9]:
preproc_time

10.769626140594482

## Prediction

In [10]:
import numpy as np
import pandas as pd
import cv2
from PIL import Image
import six

import random
import time
import os
import gc
from tqdm import tqdm

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torchvision

In [11]:
!pip install ../input/pytorch-pretrained-models/repository/pretrained-models.pytorch-master/ > /dev/null # no output

In [12]:
BATCH_SIZE = 128
MODEL_NAME ='se_resnext50_32x4d'

In [13]:
test_time = time.time()

In [14]:
class BengaliAIDataset(torch.utils.data.Dataset):
    def __init__(self, images_path, labels=None, transform=None):
        self.images_path = images_path
        self.transform = transform
        self.labels = labels
        
        if len(labels.shape) > 1:  
            # train
            self.image_names = self.labels[:, 0] + '.png'
            self.targets = self.labels[:, 1:4].astype(np.uint8)
        else:  
            # test
            self.image_names = labels + '.png'
            self.targets = None
    
    def __len__(self):
        """return length of this dataset"""
        return len(self.labels)        
    
    def get_image(self, image_name):
        """
        gets a image by a name gathered from file list text file
        :param name: name of targeted image
        :return: an image
        """
        image_path = os.path.join(self.images_path, image_name)
        image = Image.open(image_path)
        return image
    
    def __getitem__(self, index):
        image_name = self.image_names[index]
        image = self.get_image(image_name)
        
        if self.transform:
            image = self.transform(image)
        
        if self.targets is None:
            # test
            return image
        
        # train
        target = torch.from_numpy(self.targets[index])
        return image, target

In [15]:
train_transform = torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize((0.0528,), (0.1629,))]) 

In [16]:
test_dataset = BengaliAIDataset(images_path=OUT_PATH, labels=img_names, transform=train_transform)

In [17]:
loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=0)

## Model

In [18]:
import pretrainedmodels
from torch import nn
import torch.nn.functional as F

import copy

from sklearn.metrics import recall_score

In [19]:
def pred_from_model(model, dataloader):
    y_pred = list()
    
    for inputs in tqdm(dataloader):
        inputs = inputs.to(device)

        outputs = model(inputs)

        outputs = torch.split(outputs, [n_grapheme, n_vowel, n_consonant], dim=1)
        y_pred_batch = [torch.argmax(yp, dim=1, keepdim=True).cpu().numpy() for yp in outputs]
        y_pred_batch = np.concatenate(y_pred_batch, axis=1)

        y_pred.append(y_pred_batch)
        
    y_pred = np.concatenate(y_pred, axis=0)
        
    return y_pred

In [20]:
def get_model_test(model_name='se_resnext101_32x4d', n_out=186, pretrained='imagenet', device=None):
    if model_name.startswith('se_resnext'):
        model = pretrainedmodels.__dict__[model_name](pretrained=None)

        model.layer0.conv1.in_channels = 1
        model.layer0.conv1.weight.data = model.layer0.conv1.weight.mean(dim=1, keepdim=True)

        model.avg_pool = nn.AvgPool2d(kernel_size=4, stride=1)
        model.last_linear = nn.Linear(in_features=2048, out_features=n_out, bias=True)
        
        trained_path = '../input/trained-seresnext/seresnext_v0.1.pt'
        model.load_state_dict(torch.load(trained_path, map_location=device))

    else:
        raise Exception
    return model

In [21]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
n_grapheme = 168
n_vowel = 11
n_consonant = 7
n_total = n_grapheme + n_vowel + n_consonant

model = get_model_test(model_name=MODEL_NAME, n_out=n_total, device=device)
model = model.to(device)

In [22]:
y_pred = pred_from_model(model, loader_test)

100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


In [23]:
test_time = (time.time() - test_time)

In [24]:
import shutil

del_time = time.time()
shutil.rmtree(OUT_PATH)
del_time = time.time() - del_time

In [25]:
row_id = []
target = []

for name in tqdm(img_names):
    row_id += [f'{name}_grapheme_root', f'{name}_vowel_diacritic', f'{name}_consonant_diacritic']

submission_df = pd.DataFrame({'row_id': row_id, 'target': y_pred.flatten()})  # np.roll(y_pred, axis=1, shift=1).flatten()
submission_df.to_csv('submission.csv', index=False)

100%|██████████| 12/12 [00:00<00:00, 53888.27it/s]


In [26]:
submission_df.head()

Unnamed: 0,row_id,target
0,Test_0_grapheme_root,3
1,Test_0_vowel_diacritic,0
2,Test_0_consonant_diacritic,0
3,Test_1_grapheme_root,93
4,Test_1_vowel_diacritic,2


In [27]:
print(f"Preparing images time: {preproc_time:.0f} sec == {preproc_time / 60:.1f} min")
print(f"Prediction time: {test_time:.0f} sec == {test_time / 60:.1f} min")
print(f"Clean folder time: {del_time:.0f} sec == {del_time / 60:.1f} min")

Preparing images time: 11 sec == 0.2 min
Prediction time: 8 sec == 0.1 min
Clean folder time: 0 sec == 0.0 min
