In [47]:
import pandas as pd

import torch
from efficientnet_pytorch import EfficientNet

from fastai2.basics import *
from fastai2.data.all import *
from fastai2.callback.all import *
from fastai2.vision.all import *
from sklearn.metrics import recall_score

## Params

In [68]:
DATA_PATH = Path('../data')
IMAGE_DATA_PATH = Path('../data/grapheme-imgs-128x128')
OUTPUT_PATH = Path('../data/working')
LABELS_PATH  = Path('../data/iterative-stratification')

VALID_PCT = 0.2
SEED = 420
BATCH_SIZE = 64
IMG_SIZE = 64

MAX_WARP = 0.2
P_AFFINE = 0.75
MAX_ROTATE = 10.
MAX_ZOOM = 1.1
P_LIGHTING = 0.75
MAX_LIGHTING = 0.2
MAX_COUNT_RANDOM_ERASING = 3

GRAPHEME_ROOT_WEIGHT = 2
VOWEL_DIACRITIC_WEIGHT = 1
CONSONANT_DIACRITIC_WEIGHT = 1

SAMPLE_SIZE = None
if torch.cuda.is_available():
    SAMPLE_SIZE = 10_000

In [14]:
aug_kwargs = dict(size=IMG_SIZE, mode='bilinear', pad_mode=PadMode.Reflection, batch=False)

AUGMENTATIONS = [
    Warp(magnitude=MAX_WARP, p=P_AFFINE, **aug_kwargs),
    Rotate(max_deg=MAX_ROTATE, p=P_AFFINE, **aug_kwargs),
    Zoom(max_zoom=MAX_ZOOM, p=P_AFFINE, **aug_kwargs),
    Brightness(max_lighting=MAX_LIGHTING, p=P_LIGHTING, batch=False),
    Contrast(max_lighting=MAX_LIGHTING, p=P_LIGHTING, batch=False),
    RandomErasing(max_count=MAX_COUNT_RANDOM_ERASING)
]

## Create datasets and dataloaders

In [30]:
train_df = pd.read_csv(LABELS_PATH/'train_with_fold.csv')
if SAMPLE_SIZE:
    train_df.sample(n=SAMPLE_SIZE, random_state=SEED)

In [17]:
imagenet_stats

([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

In [31]:
datablock = DataBlock(
    blocks=(ImageBlock(cls=PILImageBW), CategoryBlock, CategoryBlock, CategoryBlock),
    getters=[
        ColReader('image_id', pref=IMAGE_DATA_PATH, suff='.png'),
        ColReader('grapheme_root'),
        ColReader('vowel_diacritic'),
        ColReader('consonant_diacritic')
    ],
    splitter=IndexSplitter(train_df.loc[train_df.fold==0].index))

In [21]:
tfms = AUGMENTATIONS + [Normalize(mean=0.485, std=0.229)]

In [23]:
tfms

[AffineCoordTfm: True (TensorBBox,object) -> encodes
 (TensorPoint,object) -> encodes
 (TensorImage,object) -> encodes
 (TensorMask,object) -> encodes ,
 AffineCoordTfm: True (TensorBBox,object) -> encodes
 (TensorPoint,object) -> encodes
 (TensorImage,object) -> encodes
 (TensorMask,object) -> encodes ,
 AffineCoordTfm: True (TensorBBox,object) -> encodes
 (TensorPoint,object) -> encodes
 (TensorImage,object) -> encodes
 (TensorMask,object) -> encodes ,
 LightingTfm: True (TensorImage,object) -> encodes ,
 LightingTfm: True (TensorImage,object) -> encodes ,
 RandomErasing: False (TensorImage,object) -> encodes ,
 Normalize: True (TensorImage,object) -> encodes (TensorImage,object) -> decodes]

In [32]:
data = datablock.dataloaders(train_df, bs=BATCH_SIZE, batch_tfms=tfms)
data.n_inp = 1 

## Loss and metrics

In [62]:
def loss_func(inp, grapheme_root_targ, vowel_diacritic_targ, consonant_diacritic_targ):
    grapheme_root_inp, vowel_diacritic_inp, consonant_diacritic_inp = inp

    return (
        F.cross_entropy(grapheme_root_inp, grapheme_root_targ) * GRAPHEME_ROOT_WEIGHT +
        F.cross_entropy(vowel_diacritic_inp, vowel_diacritic_targ) * VOWEL_DIACRITIC_WEIGHT +
        F.cross_entropy(consonant_diacritic_inp, consonant_diacritic_targ) * CONSONANT_DIACRITIC_WEIGHT
    )

In [63]:
class RecallPartial(Metric):
    """Stores predictions and targets on CPU in accumulate to perform final calculations with `func`."""
    def __init__(self, a=0, **kwargs):
        self.func = partial(recall_score, average='macro', zero_division=0)
        self.a = a

    def reset(self): self.targs,self.preds = [],[]

    def accumulate(self, learn):
        pred = learn.pred[self.a].argmax(dim=-1)
        targ = learn.y[self.a]
        pred,targ = to_detach(pred),to_detach(targ)
        pred,targ = flatten_check(pred,targ)
        self.preds.append(pred)
        self.targs.append(targ)

    @property
    def value(self):
        if len(self.preds) == 0: return
        preds,targs = torch.cat(self.preds),torch.cat(self.targs)
        return self.func(targs, preds)

    @property
    def name(self): return train_df.columns[self.a+1]
    

class RecallCombine(Metric):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.combine = 0

    def accumulate(self, learn):
        scores = [learn.metrics[i].value for i in range(3)]
        self.combine = np.average(scores, weights=[2,1,1])

    @property
    def value(self):
        return self.combine

## Model

In [64]:
class BengaliModel(nn.Module):
    def __init__(self, encoder, encoder_output_features):
        super().__init__()
        self.input_conv = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=1)
        
        self.pooling = nn.AdaptiveAvgPool2d(1)
        
        self.encoder = encoder
    
        self.fc_grapheme_root = nn.Linear(in_features=encoder_output_features, out_features=168)
        self.fc_vowel_diacritic = nn.Linear(in_features=encoder_output_features, out_features=11)
        self.fc_consonant_diacritic = nn.Linear(in_features=encoder_output_features, out_features=7)
        
    def forward(self, inputs):
        bs = inputs.size(0)
        
        # Convolve to 3 channels
        x = self.input_conv(inputs)

        # Convolution layers
        x = self.encoder(x)
        
        # Pooling
        x = self.pooling(x)
        
        # Final layers
        x = x.view(bs, -1)

        return [
            self.fc_grapheme_root(x),
            self.fc_vowel_diacritic(x),
            self.fc_consonant_diacritic(x)
        ]

In [65]:
class EfficientNetEncoder(EfficientNet):
    def forward(self, x):
        """Calls extract_features to extract features, applies final linear layer, and returns logits."""
        return self.extract_features(x)

## Training

In [71]:
encoder = EfficientNetEncoder.from_pretrained('efficientnet-b0')
model = BengaliModel(encoder=encoder, encoder_output_features=encoder._fc.in_features)

if torch.cuda.is_available():
    model = model.cuda()
    data = data.cuda()

learner = Learner(
    data,
    model,
    loss_func=loss_func,
    cbs=CSVLogger(OUTPUT_PATH/'history.csv'),
    metrics=[RecallPartial(a=i) for i in range(len(data.c))] + [RecallCombine()]
)

Loaded pretrained weights for efficientnet-b0


For the first epoch, I'll train just the fc layers and the first layer, which start out as random weights.

In [73]:
learner.fit(1)

In [None]:
learner.recorder.plot_loss()

## Error analysis