# Import Libraries

In [1]:
#! pip install iterative-stratification

In [1]:
#+---- Basic Libraries ----+#
import sys, os, time, gc, random
from pathlib import Path
import pandas as pd
import numpy as np
import copy
from utils import *

#+---- Utilities Libraries ----+#
import albumentations as albu
import pretrainedmodels
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from PIL import Image

#+---- Pytorch Libraries ----+#
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils import model_zoo
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader

#+---- List the input data ----+#
for dirname, _, filenames in os.walk('../../input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

../../input\.gitkeep
../../input\class_map.csv
../../input\sample_submission.csv
../../input\test.csv
../../input\train.csv
../../input\feather\test_image_data_0.feather
../../input\feather\test_image_data_1.feather
../../input\feather\test_image_data_2.feather
../../input\feather\test_image_data_3.feather
../../input\feather\train_image_data_0.feather
../../input\feather\train_image_data_1.feather
../../input\feather\train_image_data_2.feather
../../input\feather\train_image_data_3.feather
../../input\parquet\test_image_data_0.parquet
../../input\parquet\test_image_data_1.parquet
../../input\parquet\test_image_data_2.parquet
../../input\parquet\test_image_data_3.parquet
../../input\parquet\train_image_data_0.parquet
../../input\parquet\train_image_data_1.parquet
../../input\parquet\train_image_data_2.parquet
../../input\parquet\train_image_data_3.parquet


In [2]:
INPUTDIR  = Path('../../input')
DATADIR = Path('../../input/parquet')
FEATHERDIR = Path('../../input/feather')
OUTDIR = Path('.')

# Learning Parameters

In [3]:
DEBUG =True
SUBMISSION =False
BATCH_SIZE =4
NUM_EPOCH = 5
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
IMAGE_SIZE=256
TRAIN_RATIO = 0.9
WORKER = 0
MODEL_NAME ='se_resnext50_32x4d'

In [4]:
n_grapheme = 168
n_vowel = 11
n_consonant = 7
n_total = n_grapheme + n_vowel + n_consonant

In [5]:
check_var()

|            Variable Name|    Memory|
 ------------------------------------ 


# Preprocessing

In [6]:
def prepare_image(datadir, featherdir, data_type='train',
                  submission=False, indices=[0, 1, 2, 3]):
    assert data_type in ['train', 'test']
    if submission:
        image_df_list = [pd.read_parquet(datadir / f'{data_type}_image_data_{i}.parquet')
                         for i in indices]
    else:
        image_df_list = [pd.read_feather(featherdir / f'{data_type}_image_data_{i}.feather')
                         for i in indices]

    print('image_df_list', len(image_df_list))
    HEIGHT = 137
    WIDTH = 236
    images = [df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH) for df in image_df_list]
    del image_df_list
    gc.collect()
    images = np.concatenate(images, axis=0)
    return images

In [7]:
def crop_char_image(image, threshold=5./255.):
    assert image.ndim == 2
    is_black = image > threshold

    is_black_vertical = np.sum(is_black, axis=0) > 0
    is_black_horizontal = np.sum(is_black, axis=1) > 0
    left = np.argmax(is_black_horizontal)
    right = np.argmax(is_black_horizontal[::-1])
    top = np.argmax(is_black_vertical)
    bottom = np.argmax(is_black_vertical[::-1])
    height, width = image.shape
    cropped_image = image[left:height - right, top:width - bottom]
    return cropped_image

In [8]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((IMAGE_SIZE,IMAGE_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        #albu.Blur(0.2),
        #albu.GaussNoise(0.2),
        #albu.Cutout(0.2),
        #albu.GridDistortion(0.2),
        #albu.RandomBrightness(0.2),
        #albu.ElasticTransform(0.2)
    ]),
    'val': transforms.Compose([
        transforms.Resize((IMAGE_SIZE,IMAGE_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# DataSet

In [9]:
class BengaliAIDataset(Dataset):
    def __init__(self, images, labels=None, transform=None, indices=None):
        self.transform = transform
        self.images = images
        self.labels = labels
        if indices is None:
            indices = np.arange(len(images))
        self.indices = indices
        self.train = labels is not None

    def __len__(self):
        """return length of this dataset"""
        return len(self.indices)
    
    
    
    def __getitem__(self, i):
        """Return i-th data"""
        i = self.indices[i]
        x = self.images[i]
        # Opposite white and black: background will be white and
        # for future Affine transformation
        x = (255 - x).astype(np.float32) #/ 255.
        x = crop_char_image(x)
        x = Image.fromarray(x).convert("RGB")
        x = self.transform(x)
        #x = np.transpose(x, (2, 0, 1)).astype(np.float32)
        if self.train:
            y = self.labels[i]
            return x, y[0]
        else:
            return x

# Import Data

In [10]:
%%time
train = pd.read_csv(INPUTDIR/'train.csv')
train_labels = train[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].values
indices = [0] if DEBUG else [0, 1, 2, 3]
train_images = prepare_image(
    DATADIR, FEATHERDIR, data_type='train', submission=False, indices=indices)

image_df_list 1
Wall time: 3.12 s


In [11]:
n_dataset = len(train_images)
train_data_size = 200 if DEBUG else int(n_dataset * TRAIN_RATIO)
valid_data_size = 100 if DEBUG else int(n_dataset - train_data_size)

perm = np.random.RandomState(777).permutation(n_dataset)
print('perm', perm)

train_dataset = BengaliAIDataset(
    train_images, train_labels, transform=data_transforms['train'],
    indices=perm[:train_data_size])

valid_dataset = BengaliAIDataset(
    train_images, train_labels, transform=data_transforms['val'],
    indices=perm[train_data_size:train_data_size+valid_data_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKER)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKER)

dataloaders = {'train':train_loader, 'val': valid_loader}

perm [40434 17857 36804 ... 40535 15931 47919]


In [12]:
image, label = train_dataset[1]
print('image', image.shape, 'label', label)

image torch.Size([3, 256, 256]) label 91


# Modeling

In [13]:
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs, device):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    torch.backends.cudnn.benchmark = True

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            epoch_loss = 0.0
            epoch_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                epoch_loss += loss.item() * inputs.size(0)
                epoch_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = epoch_loss / dataset_sizes[phase]
            epoch_acc = epoch_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [14]:
def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
    pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
    pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]

    y = y.cpu().numpy()
    # pred_y = [p.cpu().numpy() for p in pred_y]

    recall_grapheme = sklearn.metrics.recall_score(pred_labels[0], y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_labels[1], y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_labels[2], y[:, 2], average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    # print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
    #       f'total {final_score}, y {y.shape}')
    return final_score


def calc_macro_recall(solution, submission):
    # solution df, submission df
    scores = []
    for component in ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic']:
        y_true_subset = solution[solution[component] == component]['target'].values
        y_pred_subset = submission[submission[component] == component]['target'].values
        scores.append(sklearn.metrics.recall_score(
            y_true_subset, y_pred_subset, average='macro'))
    final_score = np.average(scores, weights=[2, 1, 1])
    return final_score

In [15]:
# --- Model ---
model_ft = pretrainedmodels.__dict__[MODEL_NAME](pretrained='imagenet')

model_ft.last_linear = nn.Linear(model_ft.last_linear.in_features, n_total)

model_ft = model_ft.to(DEVICE)

model_ft.train()

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model_ft.parameters(), lr=0.001)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.7, patience=5, min_lr=1e-10)

In [16]:
since = time.time()
model=model_ft
num_epochs = NUM_EPOCH
device=DEVICE
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
torch.backends.cudnn.benchmark = True

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch+1, num_epochs))
    print('-' * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()  # Set model to training mode
        else:
            model.eval()   # Set model to evaluate mode

        epoch_loss = 0.0
        epoch_corrects = 0

        # Iterate over data.
        for inputs, labels in dataloaders[phase]:
            print(inputs.shape)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            epoch_loss += loss.item() * inputs.size(0)
            epoch_corrects += torch.sum(preds == labels.data)
        if phase == 'train':
            scheduler.step()

        epoch_loss = epoch_loss / dataset_sizes[phase]
        epoch_acc = epoch_corrects.double() / dataset_sizes[phase]

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(
            phase, epoch_loss, epoch_acc))

        # deep copy the model
        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))

# load best model weights
model.load_state_dict(best_model_wts)

Epoch 1/5
----------
torch.Size([4, 3, 256, 256])


RuntimeError: size mismatch, m1: [4 x 8192], m2: [2048 x 186] at C:/w/1/s/windows/pytorch/aten/src\THC/generic/THCTensorMathBlas.cu:290

In [None]:
model_ft = train_model(model_ft, dataloaders, criterion, optimizer, scheduler, NUM_EPOCH, DEVICE)