In [None]:
%%time
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as  py
import cv2
import pandas as pd
from fastai.vision import *
import os
import glob
import imageio
import warnings
warnings.filterwarnings("ignore")

 <h1>Preprocessing to obtain 128x128 images</h1>

In [None]:
%%time
HEIGHT = 137
WIDTH = 236
SIZE = 128
stats = (0.0692, 0.2051)
#check https://www.kaggle.com/iafoss/image-preprocessing-128x128
TEST = ['/kaggle/input/bengaliai-cv19/train_image_data_0.parquet',
        '/kaggle/input/bengaliai-cv19/train_image_data_1.parquet',
        '/kaggle/input/bengaliai-cv19/train_image_data_2.parquet',
        '/kaggle/input/bengaliai-cv19/train_image_data_3.parquet']
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    return cv2.resize(img,(size,size))
ima=[]
for fname in TEST:
    df = pd.read_parquet(fname)
        #the input is inverted
    data = 255 - df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)
    for idx in range(len(df)):
        #name = df.iloc[idx,0]
        #normalize each image by its max val
        img = (data[idx]*(255.0/data[idx].max())).astype(np.uint8)
        img = crop_resize(img)
        ima.append(img)

In [None]:
del TEST
del HEIGHT
del WIDTH
del SIZE
del img
del data
del df
       

# Saving the images in a directory


In [None]:
%%time
im128=np.array(ima)
def save_imgs(path:Path, data):
    path.mkdir(parents=True,exist_ok=True)
    for i in range(len(data)):
        imageio.imsave(path/'{}.png'.format(i),data[i])
        
save_imgs(Path('/data/test'),im128)


In [None]:
#!cp /kaggle/input/grapheme-imgs-128x128 -r /data/train
del ima
del im128

# Databunch creation for test images

In [None]:
%time
ptrain = pd.read_csv('/kaggle/input/bengaliai-cv19/train.csv')
ptrain['Image_path'] = ptrain.apply(lambda row: '/kaggle/input/grapheme-imgs-128x128/' + row.image_id + '.png', axis = 1)
ptrain['grapheme_root'] = ptrain.apply(lambda row: str(row.grapheme_root), axis = 1)
ptrain['vowel_diacritic'] = ptrain.apply(lambda row: str(row.vowel_diacritic), axis = 1)
ptrain['consonant_diacritic'] = ptrain.apply(lambda row: str(row.consonant_diacritic), axis = 1)


In [None]:
%time
ptra=glob.glob('/kaggle/input/grapheme-imgs-128x128/*')
p1=pd.DataFrame(ptra,columns=['Image_path'])
def process(s):
    return str(s).split('/')[4]
p1['Image_path']=p1['Image_path'].apply(process)
ptrain['Image_path']=ptrain['Image_path'].apply(process)
p3=p1.merge(ptrain,on='Image_path',)

In [None]:
del ptrain
del p1
del ptra

In [None]:
%time
tfms = get_transforms(do_flip=False,)
data = ImageDataBunch.from_folder('../input', 
                                  train="grapheme-imgs-128x128",
                                  size=128,bs=128).normalize(stats)
test=ImageList.from_folder('/data/test')

In [None]:
data.add_test(test,tfm_y=False)

# Databunch creation for training images

In [None]:
%%time
data_cd = ImageDataBunch.from_df(path='/kaggle/input/',folder='grapheme-imgs-128x128',df=p3,bs=128,size=128,label_col='consonant_diacritic',tfm_y=False).normalize(imagenet_stats)
data_gr = ImageDataBunch.from_df(path='/kaggle/input/',folder='grapheme-imgs-128x128',df=p3,bs=128,size=128,label_col='grapheme_root',tfm_y=False).normalize(imagenet_stats)
data_vd = ImageDataBunch.from_df(path='/kaggle/input/',folder='grapheme-imgs-128x128',df=p3,bs=128,size=128,label_col='vowel_diacritic',tfm_y=False).normalize(imagenet_stats)

# Model loading 

In [None]:
%%time
if not os.path.exists('/root/.cache/torch/checkpoints'):
        os.makedirs('/root/.cache/torch/checkpoints')
!cp /kaggle/input/fastai-pretrained-models/densenet121-a639ec97.pth /root/.cache/torch/checkpoints/densenet121-a639ec97.pth

learn_cd = cnn_learner(data_cd, models.densenet121, metrics=[error_rate, accuracy],model_dir = Path('../kaggle/working'),).to_fp16()
learn_vd = cnn_learner(data_vd, models.densenet121, metrics=[error_rate, accuracy],model_dir = Path('../kaggle/working'),).to_fp16()
learn_gr = cnn_learner(data_gr, models.densenet121, metrics=[error_rate, accuracy], model_dir = Path('../kaggle/working'),).to_fp16()

In [None]:
del data_cd
del data_vd
del data_gr

In [None]:
%%capture
learn_gr.load('/kaggle/input/modelgr/best_gr_model')
learn_cd.load('/kaggle/input/models/best_cd_model',)
learn_vd.load('/kaggle/input/models/best_vd_model',)

In [None]:
%%capture
m1_pred1=[]
m2_pred2=[]
m3_pred3=[]
for i in data.test_ds:
    y1=learn_cd.predict(i[0])
    y2=learn_vd.predict(i[0])
    y3=learn_gr.predict(i[0])
    m2_pred2.append(y1[1].item())
    m3_pred3.append(y2[1].item())
    m1_pred1.append(y3[1].item())
    del y1
    del y2
    del y3

In [None]:
del learn_gr
del learn_vd
del learn_cd

# Prediction

In [None]:
# Converting data to submission format

# m1 CD
# m2 VD
sample_sub = pd.read_csv('/kaggle/input/bengaliai-cv19/sample_submission.csv')
cd = 0 
cd_itr = 0
gr = 1
gr_itr = 0
vd = 2 
vd_itr = 0
length = sample_sub['target'].shape[0]
for i in range(length):
    if(i==gr):
        sample_sub.at[i,'target'] = m1_pred1[gr_itr]
        gr_itr+=1
        gr+=3
    if(i==cd):
        sample_sub.at[i,'target'] = m2_pred2[cd_itr]
        cd_itr+=1
        cd+=3
    elif(i==vd):
        sample_sub.at[i,'target'] = m3_pred3[vd_itr]
        vd_itr+=1
        vd+=3
#print(sample_sub.head())
del cd
del cd_itr
del gr
del gr_itr
del vd
del vd_itr
del length

In [None]:
# Writing to submission csv file
sample_sub.to_csv('submission.csv', index=False)

In [None]:
sample_sub.head()

In [None]:
del sample_sub

In [None]:
!ls