In [12]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import cv2
from tqdm import tqdm_notebook as tqdm
from fastai2.vision.all import xresnet50, LinBnDrop
from fastai.vision import *
import os
import warnings
warnings.filterwarnings("ignore")

In [5]:
HEIGHT = 137
WIDTH = 236
SIZE = 128
NUMENS = 5
bs = 128
sz = 128
stats = (0.0728, 0.2065)
dnet_arch = models.densenet121
xres_arch = xresnet50
inp = '../input'
md = inp + '/bengaliai-cv19'
MODELS = inp + '/bengali-weights'
nworkers = 8
submission_df = pd.read_csv(md + '/sample_submission.csv')
test_df = pd.read_csv(md + '/test.csv')

TEST = [md + '/test_image_data_0.parquet',
        md + '/test_image_data_1.parquet',
        md + '/test_image_data_2.parquet',
        md + '/test_image_data_3.parquet']

LABELS = inp + '/folded_data/train_with_fold.csv'

df = pd.read_csv(LABELS)
nunique = list(df.nunique())[1:-1]
mdir = sorted([w for w in os.listdir(MODELS)])

# Model

In [None]:
class MishFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x * torch.tanh(F.softplus(x))   # x * tanh(ln(1 + exp(x)))

    @staticmethod
    def backward(ctx, grad_output):
        x = ctx.saved_variables[0]
        sigmoid = torch.sigmoid(x)
        tanh_sp = torch.tanh(F.softplus(x)) 
        return grad_output * (tanh_sp + x * sigmoid * (1 - tanh_sp * tanh_sp))

class Mish(nn.Module):
    def forward(self, x):
        return MishFunction.apply(x)

def to_Mish(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, Mish())
        else:
            to_Mish(child)

In [None]:
class Head(Module):
    def __init__(self, nc, n, ps=0.5):
        self.fc = nn.Sequential(*[AdaptiveConcatPool2d(), Mish(), Flatten(),
             LinBnDrop(nc*2, 512, True, ps, Mish()),
             LinBnDrop(512, n, True, ps)])
        self._init_weight()
        
    def _init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1.0)
                m.bias.data.zero_()
        
    def forward(self, x):
        return self.fc(x)

class CascadeModel(Module):
    def __init__(self, arch=xres_arch, n=[168,11,7], pre=True):
        m = arch(pre)
        m = nn.Sequential(*children_and_parameters(m)[:-4])
        conv = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
        w = (m[0][0].weight.sum(1)).unsqueeze(1)
        conv.weight = nn.Parameter(w)
        m[0][0] = conv
        nc = m(torch.zeros(2, 1, sz, sz)).detach().shape[1]
        self.body = m
        self.heads = nn.ModuleList([Head(nc, c) for c in n])
        
    def forward(self, x):    
        x = self.body(x)
        return [f(x) for f in self.heads]

# Data

In [8]:
#check https://www.kaggle.com/iafoss/image-preprocessing-128x128

def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    return cv2.resize(img,(size,size))

In [9]:
class GraphemeDataset(Dataset):
    def __init__(self, fname):
        self.df = pd.read_parquet(fname)
        self.data = 255 - self.df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        name = self.df.iloc[idx,0]
        #normalize each image by its max val
        img = (self.data[idx]*(255.0/self.data[idx].max())).astype(np.uint8)
        img = crop_resize(img)
        img = (img.astype(np.float32)/255.0 - stats[0])/stats[1]
        return img, name

In [10]:
def check_size(pqt_folder):
    size = 0
    for pqt_fn in pqt_folder: size += pd.read_parquet(pqt_fn, columns=['1']).shape[0]
    return size

# Prediction

In [12]:
# Predictions
size = check_size(TEST)
results = {
    'grapheme_root': np.zeros((len(mdir), size, 168), dtype=np.float),
    'vowel_diacritic': np.zeros((len(mdir), size, 11), dtype=np.float),
    'consonant_diacritic': np.zeros((len(mdir), size, 7), dtype=np.float),
}
ensemble_ratio = 1 / NUMENS

In [13]:
for model_idx,model_dct in enumerate(mdir): 
    batch_idx,row_id = 0,[]
    model = CascadeModel(pre=False).cuda()
    model.load_state_dict(torch.load(MODELS + '/' + model_dct, map_location=torch.device('cpu')));
    model.eval();
    for fname in TEST:
        ds = GraphemeDataset(fname)
        dl = DataLoader(ds, batch_size=bs, num_workers=nworkers, shuffle=False)
        with torch.no_grad():
            for x,y in tqdm(dl):
                x = x.unsqueeze(1).cuda()
                out_graph, out_vowel, out_conso = model(x)

                out_graph = F.softmax(out_graph, dim=1).data.cpu().numpy() * ensemble_ratio
                out_vowel = F.softmax(out_vowel, dim=1).data.cpu().numpy() * ensemble_ratio
                out_conso = F.softmax(out_conso, dim=1).data.cpu().numpy() * ensemble_ratio

                step = len(y)
                if batch_idx == 0: start = 0; batch_idx += 1
                else: start = end
                end = step + start
                results['grapheme_root'][model_idx, start:end, :] = out_graph
                results['vowel_diacritic'][model_idx, start:end, :] = out_vowel
                results['consonant_diacritic'][model_idx, start:end, :] = out_conso
              
                for name in y: row_id += [f'{name}_grapheme_root',f'{name}_vowel_diacritic', f'{name}_consonant_diacritic']                    
                del out_graph, out_vowel, out_conso, x, y 
            del ds, dl

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




In [14]:
target = np.zeros((len(row_id)))
row_series = pd.Series(row_id)

In [15]:
for l in ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']:
    idx = row_series[row_series.str.contains(l)].index
    target[idx] = results[l].sum(axis=0).argmax(axis=1)
target = target.astype('uint')

In [16]:
sub_df = pd.DataFrame({'row_id': row_id, 'target': target})

In [17]:
sub_df.to_csv('submission.csv', index=False)