In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import os
import matplotlib.pyplot as plt
import pylab
import librosa
import librosa.display
import numpy as np
import pandas as pd
import io
import tarfile
import PIL
import boto3
from fastai import *
from fastai.vision import *
import cv2
from tqdm import tqdm_notebook as tqdm
import zipfile
import io
import warnings
warnings.filterwarnings("ignore")

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
sz = 128
bs = 128
nfolds = 4 #keep the same split as the initial dataset
fold = 0
SEED = 2019
TRAIN = '../input/grapheme-imgs-128x128/'
LABELS = '../input/bengaliai-cv19/train.csv'
arch = models.resnet34

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [None]:
df = pd.read_csv(LABELS)
nunique = list(df.nunique())[1:-1]
print(nunique)
df.tail()

In [None]:
HEIGHT = 137
WIDTH = 236
SIZE = 128

TEST = ['/kaggle/input/bengaliai-cv19/test_image_data_0.parquet',
         '/kaggle/input/bengaliai-cv19/test_image_data_1.parquet',
         '/kaggle/input/bengaliai-cv19/test_image_data_2.parquet',
         '/kaggle/input/bengaliai-cv19/test_image_data_3.parquet']

OUT_TEST = 'test.zip'

In [None]:
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80) #try 60
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    return cv2.resize(img,(size,size))

In [None]:
df = pd.read_parquet(TEST[0])
n_imgs = 3
fig, axs = plt.subplots(n_imgs, 2, figsize=(10, 5*n_imgs))

for idx in range(n_imgs):
    #somehow the original input is inverted
    img0 = 255 - df.iloc[idx, 1:].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
    #normalize each image by its max val
    img = (img0*(255.0/img0.max())).astype(np.uint8)
    img = crop_resize(img)

    axs[idx,0].imshow(img0)
    axs[idx,0].set_title('Original image')
    axs[idx,0].axis('off')
    axs[idx,1].imshow(img)
    axs[idx,1].set_title('Crop & resize')
    axs[idx,1].axis('off')
plt.show()

In [None]:
df = pd.read_csv(LABELS)
nunique = list(df.nunique())[1:-1]
print(nunique)
df.tail()

In [None]:
pathimg = datapath4file('../input/test-bengali')

In [None]:
stats = ([0.0692], [0.2051])
data1 = (ImageList.from_df(df, path='.', folder=TRAIN, suffix='.png', 
        cols='image_id')
        .split_by_idx(range(fold*len(df)//nfolds,(fold+1)*len(df)//nfolds))
        .label_from_df(cols=['grapheme_root'])
        .transform(get_transforms(do_flip=False,max_warp=0.1), size=sz, padding_mode='zeros')
        .databunch(bs=bs)).normalize(stats)

#data1.show_batch()

In [None]:
stats = ([0.0692], [0.2051])
data2 = (ImageList.from_df(df, path='.', folder=TRAIN, suffix='.png', 
        cols='image_id')
        .split_by_idx(range(fold*len(df)//nfolds,(fold+1)*len(df)//nfolds))
        .label_from_df(cols=['vowel_diacritic'])
        .transform(get_transforms(do_flip=False,max_warp=0.1), size=sz, padding_mode='zeros')
        .databunch(bs=bs)).normalize(stats)

#data2.show_batch()

In [None]:
stats = ([0.0692], [0.2051])
data3 = (ImageList.from_df(df, path='.', folder=TRAIN, suffix='.png', 
        cols='image_id')
        .split_by_idx(range(fold*len(df)//nfolds,(fold+1)*len(df)//nfolds))
        .label_from_df(cols=['consonant_diacritic'])
        .transform(get_transforms(do_flip=False,max_warp=0.1), size=sz, padding_mode='zeros')
        .databunch(bs=bs)).normalize(stats)

#data3.show_batch()

In [None]:
from torchvision.models import resnet34

In [None]:
def my_resnet(pretrained=False, progress=True, **kwargs):
    m = resnet34(pretrained=False, progress=True, **kwargs)
    m.load_state_dict(torch.load("/kaggle/input/resnet34/resnet34.pth"))
    return m

In [None]:
learn1 = cnn_learner(data1, my_resnet)
learn2 = cnn_learner(data2, my_resnet)
learn3 = cnn_learner(data3, my_resnet)

In [None]:
learn1.load('/kaggle/input/saved-models/stage-1')
learn2.load('/kaggle/input/saved-models/stage-2')
learn3.load('/kaggle/input/saved-models/stage-3')

In [None]:
file_name = []
pred = []
for fname in TEST:
    df1 = pd.read_parquet(fname)
    #the input is inverted
    data = 255 - df1.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)
    for idx in tqdm(range(len(df1))):
        name = df1.iloc[idx,0]
        #normalize each image by its max val
        img = (data[idx]*(255.0/data[idx].max())).astype(np.uint8)
        img = crop_resize(img)
    # to make prediction with {model}.predict converting to tensor is important
        img = np.stack((img,)*3, axis=-1)
        img = pil2tensor(img,np.float32).div_(255)
        c=int(learn3.predict(Image(img))[0])
        pred.append(c)
        file_name.append(str(name)+'_'+'consonant_diacritic')
        g=int(learn1.predict(Image(img))[0])
        pred.append(g)
        file_name.append(str(name)+'_'+'grapheme_root')
        v=int(learn2.predict(Image(img))[0])
        pred.append(v)
        file_name.append(str(name)+'_'+'vowel_diacritic')

In [None]:
"""x_tot,x2_tot = [],[]
row_id1, catag1 = [], []

for i in range(1):
    df = pd.read_parquet(TEST[i])
    n_imgs=np.size(df,0)
    for idx in range(n_imgs):
        #somehow the original input is inverted
        img0 = 255 - df.iloc[idx, 1:].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
        #normalize each image by its max val
        img = (img0*(255.0/img0.max())).astype(np.uint8)
        img = crop_resize(img)
        img = np.stack((img,)*3, axis=-1)
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_fastai = Image(pil2tensor(img, dtype=np.uint8))
        row_id1.append(df.image_id[idx]+"_consonant_diacritic")
        row_id1.append(df.image_id[idx]+"_grapheme_root")
        row_id1.append(df.image_id[idx]+"_vowel_diacritic")
        catag1.append(int(learn3.predict(img_fastai)[0]))
        catag1.append(int(learn1.predict(img_fastai)[0]))
        catag1.append(int(learn2.predict(img_fastai)[0]))"""

[](https://www.kaggle.com/gaur128/bengali-graphemems-model-inference?scriptVersionId=29481219)
https://www.kaggle.com/venky2506/fastai-inference-128x128-v2

In [None]:
'''df = pd.read_parquet(TEST[3])
n_imgs=np.size(df,0)
for idx in range(n_imgs):
    #somehow the original input is inverted
    img0 = 255 - df.iloc[idx, 1:].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
    #normalize each image by its max val
    img = (img0*(255.0/img0.max())).astype(np.uint8)
    img = crop_resize(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_fastai = Image(pil2tensor(img, dtype=np.uint8))'''

In [None]:
''''class GraphemeDataset(Dataset):
    def __init__(self, fname):
        self.df = pd.read_parquet(fname)
        self.data = 255 - self.df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        name = self.df.iloc[idx,0]
        #normalize each image by its max val
        img = (self.data[idx]*(255.0/self.data[idx].max())).astype(np.uint8)
        img = crop_resize(img)
        img = (img.astype(np.float32)/255.0 - stats[0])/stats[1]
        return img, name'''


In [None]:
#nworkers = 2

In [None]:
'''p1,p2,p3 = int(learn1.predict(Image(x.float()))[0]), int(learn2.predict(Image(x.float()))[0]), int(learn3.predict(Image(x.float()))[0])
            for idx,name in enumerate(y):
                row_id += [f'{name}_grapheme_root',f'{name}_vowel_diacritic',
                           f'{name}_consonant_diacritic']
                target += [p1,p2,p3]'''

In [None]:
'''p1,p2,p3 = int(learn1.predict(Image(x.float()))[0]), int(learn2.predict(Image(x.float()))[0]), int(learn3.predict(Image(x.float()))[0])
            for idx,name in enumerate(y):
                row_id += [f'{name}_grapheme_root',f'{name}_vowel_diacritic',
                           f'{name}_consonant_diacritic']
                target += [p1,p2,p3]'''

In [None]:
''''row_id,target = [],[]
for fname in TEST:
    ds = GraphemeDataset(fname)
    dl = DataLoader(ds, batch_size=bs, num_workers=nworkers, shuffle=False)
    with torch.no_grad():
        for x,y in tqdm(dl):
            print(x)'''
 

In [None]:
'''sub_df = pd.DataFrame({'row_id': row_id, 'target': target})
sub_df.to_csv('submission.csv', index=False)
sub_df.head(100)'''

In [None]:
'''#real
x_tot,x2_tot = [],[]
row_id, catag = [], []

for i in range(4):
    df = pd.read_parquet(TEST[i])
    n_imgs=np.size(df,0)
    for idx in range(n_imgs):
        #somehow the original input is inverted
        img0 = 255 - df.iloc[idx, 1:].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
        #normalize each image by its max val
        img = (img0*(255.0/img0.max())).astype(np.uint8)
        img = crop_resize(img)
        cv2.imwrite('output.png',img)
        A=open_image('output.png') 
        row_id.append(df.image_id[idx]+"_consonant_diacritic")
        row_id.append(df.image_id[idx]+"_grapheme_root")
        row_id.append(df.image_id[idx]+"_vowel_diacritic")
        catag.append(int(learn3.predict(A)[0]))
        catag.append(int(learn1.predict(A)[0]))
        catag.append(int(learn2.predict(A)[0]))
    del df
    #print(int(learn1.predict(A)[0]))
    #img2 = cv2.cvtColor(np.float32(img), cv2.COLOR_BGR2RGB)
    #img_fastai = Image(pil2tensor(img2, dtype=np.float32))
    #print(int(learn1.predict(img_fastai)[0]))'''

In [None]:
row_id = pd.Series(file_name)
catag = pd.Series(pred)
frame = { 'row_id': row_id , 'target': catag} 
result = pd.DataFrame(frame)
result.to_csv('submission.csv',index=False)

In [None]:
del file_name
del pred
del row_id
del catag
del df
del frame

In [None]:
#!rm -rf  /kaggle/working/output.png/*