# Description
This kernel creates an image dataset for train based on the competition data. Use of images allows to avoid loading the entire dataset into memory, which may be important for running experiments at kaggle. Meanwhile the inference can be done  by loading the dataset part by part without saving it as images to improve the speed.

The original images are cropped keeping only the characters and resized to 128x128 with adding the corresponding padding to maintain the aspect ratio (see images plot in the kernel). The stats of the produced images are also computed.

In [None]:
!pip install /kaggle/input/efficientnet-pytorch -f ./ --no-index

In [None]:
!pip install /kaggle/input/pretrainedmodels -f ./ --no-index

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import cv2
from tqdm import tqdm_notebook as tqdm
import zipfile
import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
HEIGHT = 137
WIDTH = 236
SIZE = 128

TRAIN = ['../input/bengaliai-cv19/test_image_data_0.parquet',
         '../input/bengaliai-cv19/test_image_data_1.parquet',
         '../input/bengaliai-cv19/test_image_data_2.parquet',
         '../input/bengaliai-cv19/test_image_data_3.parquet']

OUT_TRAIN = 'test_128.zip'

In [None]:
df = pd.read_parquet(TRAIN[0])
# df.head()

In [None]:
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    return cv2.resize(img,(size,size))

In [None]:
# # df = pd.read_parquet(TRAIN[0])
# n_imgs = 2
# fig, axs = plt.subplots(n_imgs, 2, figsize=(10, 5*n_imgs))

# for idx in range(n_imgs):
#     #somehow the original input is inverted
#     img0 = 255 - df.iloc[idx, 1:].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
# #     img0 = df.iloc[idx, 1:].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
#     #normalize each image by its max val
#     img = (img0*(255.0/img0.max())).astype(np.uint8)
#     img = crop_resize(img)
# #     print(img[100])
# #     axs[idx,0].imshow(img0)
#     axs[idx,0].imshow(img0, cmap='gray')
#     axs[idx,0].set_title('Original image')
#     axs[idx,0].axis('off')
#     axs[idx,1].imshow(img, cmap='gray')
#     axs[idx,1].set_title('Crop & resize')
#     axs[idx,1].axis('off')
# plt.show()

In [None]:
x_tot,x2_tot = [],[]
with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out:
    for fname in TRAIN:
        df = pd.read_parquet(fname)
        #the input is inverted
        data = 255 - df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)
        for idx in tqdm(range(len(df))):
            name = df.iloc[idx,0]
            #normalize each image by its max val
            img = (data[idx]*(255.0/data[idx].max())).astype(np.uint8)
            img = crop_resize(img)
        
            x_tot.append((img/255.0).mean())
            x2_tot.append(((img/255.0)**2).mean()) 
            img = cv2.imencode('.png',img)[1]
            img_out.writestr(name + '.png', img)

In [None]:
#image stats
img_avr =  np.array(x_tot).mean()
img_std =  np.sqrt(np.array(x2_tot).mean() - img_avr**2)
print('mean:',img_avr, ', std:', img_std)

In [None]:
import torch.nn as nn
import pretrainedmodels
import pretrainedmodels.utils
import torchvision.models as models
from efficientnet_pytorch import EfficientNet

def bn_drop_lin(n_in, n_out, bn=True, p = 0., actn = None):
    "`n_in`->bn->dropout->linear(`n_in`,`n_out`)->`actn`"
    layers = [nn.BatchNorm1d(n_in)] if bn else []
    if p != 0: layers.append(nn.Dropout(p))
    layers.append(nn.Linear(n_in, n_out))
    if actn is not None: layers.append(actn)
    return layers

class Head(nn.Module):
    """docstring for Head"""
    def __init__(self, in_channels, out_channels, drop_rate = 0.5):
        super(Head, self).__init__()
        layers = bn_drop_lin(in_channels, 512, True, drop_rate, nn.ReLU(inplace=True)) +\
                    bn_drop_lin(512, out_channels, True, drop_rate)
        self.fc = nn.Sequential(*layers)

    def forward(self, x):
        return self.fc(x)
        

class MultiHeadNet(nn.Module):
    def __init__(self, arch, pretrained, input_space = 'gray'):
        super(MultiHeadNet, self).__init__()
        # create model
        print("=> creating model '{}'".format(arch))
        if arch.startswith('efficientnet'):
            if pretrained.lower() not in ['false', 'none', 'not', 'no', '0']:
                print("=> using pre-trained parameters '{}'".format(pretrained))
                model = EfficientNet.from_pretrained(arch)
            else:
                model = EfficientNet.from_name(arch)
            # model._fc = nn.Linear(model._fc.in_features, 2)
            in_features = model._fc.in_features
            
        else:
            if pretrained.lower() not in ['false', 'none', 'not', 'no', '0']:
                print("=> using pre-trained parameters '{}'".format(pretrained))
                model = pretrainedmodels.__dict__[arch](num_classes=1000,
                                                             pretrained=pretrained)
            else:
                model = pretrainedmodels.__dict__[arch](num_classes=1000,
                                                             pretrained=None)

            # model.last_linear = nn.Linear(model.last_linear.in_features, 2)
            in_features = model.last_linear.in_features

        if input_space == 'gray':
            if arch.startswith('resnet'):
                model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
            else:
                print('Modify the input space.')

        self.model = nn.Sequential(*(list(model.children())[:-1]))

        self.head_graph = Head(in_features, 168)
        self.head_vowel = Head(in_features, 11)
        self.head_conso = Head(in_features, 7)



    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        output_graph = self.head_graph(x)
        output_vowel = self.head_vowel(x)
        output_conso = self.head_conso(x)
        return output_graph, output_vowel, output_conso






In [None]:
file_dir = './test_128.zip'
zipFile = zipfile.ZipFile(file_dir)
zipFile.extractall('test_128')

In [None]:
from torchvision import transforms
import os
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torch.utils.data.sampler import *
from torchvision import transforms
from PIL import Image

class MyDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.files = os.listdir(img_dir)
        self.transform = transform
        self.img_dir = img_dir
 
    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir,'Test_'+str(index)+'.png')
        img = Image.open(img_path)
        if self.transform is not None:
            img = self.transform(img)
        return img, img_path
 
    def __len__(self):
        return len(self.files)


In [None]:
val_augment = transforms.Compose([
    # transforms.Resize((input_size,input_size)),
    transforms.ToTensor(),
    transforms.Normalize((img_avr,), (img_std,)),
    ])
IMAGE_DIR = './test_128'
dataset  = MyDataset(IMAGE_DIR, val_augment)
test_loader = DataLoader(dataset,
                        batch_size  = 32,
                        drop_last   = False,
                        num_workers = 4,
                        pin_memory  = True)

# plt.figure("Image") # 图像窗口名称
# plt.imshow(image)
model = MultiHeadNet('resnet18', 'none')
model = torch.nn.DataParallel(model).cuda()
print(model)
checkpoint = torch.load("/kaggle/input/checkpoint/checkpoint_epoch_006_macro_avg_recall_0.8878.pth")
# print(checkpoint)
model.load_state_dict(checkpoint['state_dict'])
model.eval()

In [None]:
allpathes=[]
allpreds_root = []
allpreds_vowel = []
allpreds_consonant = []
for step, (image,img_path) in enumerate(test_loader):
    output_graph, output_vowel, output_conso = model(image)
    preds_root = np.argmax(output_graph.cpu().detach().numpy(), axis=1)# 其中，axis=1表示按行计算
    preds_vowel = np.argmax(output_vowel.cpu().detach().numpy(), axis=1)# 其中，axis=1表示按行计算
    preds_consonant = np.argmax(output_conso.cpu().detach().numpy(), axis=1)# 其中，axis=1表示按行计算
    allpathes.extend(img_path)
    allpreds_root.extend(preds_root.tolist())
    allpreds_vowel.extend(preds_vowel.tolist())
    allpreds_consonant.extend(preds_consonant.tolist())
print(allpathes)
print(allpreds_root)
print(allpreds_vowel)
print(allpreds_consonant)

In [None]:
row_id=[]
target=[]
for idx, image_id in enumerate(allpathes):
    target.extend([allpreds_consonant[idx]])
    target.extend([allpreds_root[idx]])
    target.extend([allpreds_vowel[idx]])

    row_id.extend(['Test_'+str(idx) + '_consonant_diacritic'])
    row_id.extend(['Test_'+str(idx) + '_grapheme_root'])
    row_id.extend(['Test_'+str(idx) + '_vowel_diacritic'])

print(row_id)
print(target)
# submission_df = pd.read_csv('../input/bengaliai-cv19/sample_submission.csv')
#print(submission_df.shape)
# print(len(target))
# print(len(row_id))
# print(target)
# print(row_id)
df = pd.DataFrame(zip(row_id, target), columns=['row_id', 'target'])
# submission_df.target = np.hstack(np.array(target).astype(np.int))
#submission_df['target'] = np.array(target).astype(np.int)
#submission_df['row_id'] = row_id
print(df.head(10))
df.to_csv('submission.csv', index=False)