In [2]:
import sys
sys.path.append('/home/mod/Workspace/bengali-2020')

In [3]:
from pathlib import Path
# from bengali.dataset import BengaliDataset, get_transform
# from torch.utils.data import DataLoader
import pandas as pd
import os
import torchvision
from bengali.models import build_model
# from bengali.utils.data_utils import load_train_valid_df, DATA_ROOT
import torch
from torch.nn import Module
from PIL import Image
import cv2
from torch.utils.data import Dataset, DataLoader
import numpy as np

from tqdm import tqdm_notebook as tqdm
from albumentations import Compose
from albumentations.pytorch import ToTensorV2

In [3]:
BASE = 'resnet50'  # 'resnext101_32x16d_wsl'
HEAD_DROPOUT = 0.5
FROZEN_START = 1
CLASSES = [168, 11, 7]
FP16 = True
SIZE=128
HEIGHT = 137
WIDTH = 236
stats = (0.0692, 0.2051)
WEIGHTS_FILE= os.path.abspath('/home/mod/Workspace/bengali-2020/bengali/models_10epochs/model_best.pth')

In [4]:
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    img = cv2.resize(img,(size,size))
    img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
    return img

In [5]:
class GraphemeDataset(Dataset):
    def __init__(self, fname, transform=None):
        self.df = pd.read_parquet(fname)
        self.data = 255 - self.df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        name = self.df.iloc[idx,0]
        #normalize each image by its max val
        img = (self.data[idx]*(255.0/self.data[idx].max())).astype(np.uint8)
        img = crop_resize(img)
        img = (img.astype(np.float32)/255.0 - stats[0])/stats[1]
        
        if self.transform:
            transformed = self.transform(image=img)
            img = transformed['image']
            
        return img, name

In [6]:
TEST = ['/home/mod/Workspace/kaggle/Bengali/test_image_data_0.parquet',
        '/home/mod/Workspace/kaggle/Bengali/test_image_data_1.parquet',
        '/home/mod/Workspace/kaggle/Bengali/test_image_data_2.parquet',
        '/home/mod/Workspace/kaggle/Bengali/test_image_data_3.parquet']

In [7]:
transform_test = Compose([
    ToTensorV2()
])

In [8]:
# ds = GraphemeDataset(TEST[0])
# dl = DataLoader(ds, batch_size=1)

In [9]:
# import matplotlib.pyplot as plt
# plt.imshow(dl.dataset[2][0], cmap='gray')

In [10]:
# to_tensor(dl.dataset[1][0]).shape

In [11]:
# dl.dataset[1][1]

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model: Module = build_model(
    base=BASE,
    frozen_start=FROZEN_START,
    fp16=FP16,
    n_classes=CLASSES,
    head_dropout=HEAD_DROPOUT,
)
print('Creating model ...')
model.load_state_dict(torch.load(WEIGHTS_FILE))
model = model.to(device)
model.eval()

Creating model ...


Model(
  (base): ResNetBase(
    (base): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)

In [13]:
DATA_ROOT

'/home/mod/Workspace/kaggle/Bengali/grapheme-imgs-128x128'

In [14]:
# path = os.path.join(DATA_ROOT, 'Train_9.png')

In [15]:
# img = Image.open(path).convert("RGB"); img

In [16]:
trans = torchvision.transforms.ToPILImage()

In [17]:
to_tensor = torchvision.transforms.ToTensor()

In [18]:
# inputs = to_tensor(img)
# inputs = inputs.to(device, dtype=torch.float)
# inputs = inputs.unsqueeze(0)
# pred_g, pred_v, pred_c = model(inputs)
# pred_g, pred_v, pred_c

In [19]:
import torch.nn.functional as F

In [20]:
# pred_g = F.softmax(pred_g, dim=1).data.cpu().numpy().argmax(axis=1); pred_g

In [21]:
# pred_v = F.softmax(pred_v, dim=1).data.cpu().numpy().argmax(axis=1); pred_v

In [22]:
# pred_c = F.softmax(pred_c, dim=1).data.cpu().numpy().argmax(axis=1); pred_c

In [23]:
row_id,target = [],[]
for fname in TEST:
    ds = GraphemeDataset(fname, transform=transform_test)
    dl = DataLoader(ds, batch_size=1, shuffle=False)
    with torch.no_grad():
        for x,y in tqdm(dl):
#             x = to_tensor(x)
#             x = x.to(device, dtype=torch.float)
            print(x.shape)
            x = x.to(device, dtype=torch.float)
            pred_g, pred_v, pred_c = model(x)
            
            pred_g = F.softmax(pred_g, dim=1).data.cpu().numpy().argmax(axis=1)
            pred_v = F.softmax(pred_v, dim=1).data.cpu().numpy().argmax(axis=1)
            pred_c = F.softmax(pred_c, dim=1).data.cpu().numpy().argmax(axis=1)
            
            for idx,name in enumerate(y):
                row_id += [f'{name}_grapheme_root',f'{name}_vowel_diacritic',
                           f'{name}_consonant_diacritic']
                target += [pred_g[idx].item(),pred_v[idx].item(),pred_c[idx].item()]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

torch.Size([1, 3, 128, 128])
torch.Size([1, 3, 128, 128])
torch.Size([1, 3, 128, 128])



HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

torch.Size([1, 3, 128, 128])
torch.Size([1, 3, 128, 128])
torch.Size([1, 3, 128, 128])



HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

torch.Size([1, 3, 128, 128])
torch.Size([1, 3, 128, 128])
torch.Size([1, 3, 128, 128])



HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

torch.Size([1, 3, 128, 128])
torch.Size([1, 3, 128, 128])
torch.Size([1, 3, 128, 128])



In [24]:
sub_df = pd.DataFrame({'row_id': row_id, 'target': target})

In [25]:
sub_df

Unnamed: 0,row_id,target
0,Test_0_grapheme_root,3
1,Test_0_vowel_diacritic,0
2,Test_0_consonant_diacritic,0
3,Test_1_grapheme_root,93
4,Test_1_vowel_diacritic,2
5,Test_1_consonant_diacritic,0
6,Test_2_grapheme_root,19
7,Test_2_vowel_diacritic,0
8,Test_2_consonant_diacritic,0
9,Test_3_grapheme_root,115


In [None]:
# sub_df = pd.DataFrame({'row_id': row_id, 'target': target})
sub_df.to_csv('./submission_10epochs.csv', index=False)
sub_df.head()