In [None]:
!pip install -q fastai==1.0.45 torchsummary pretrainedmodels

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai import *
from fastai.vision import *
from sklearn.metrics import roc_auc_score
from torchvision import models as tvmodels
from pretrainedmodels import models as ptmodels
from torchsummary import summary

In [None]:
class AucScore(Callback):
    def on_epoch_begin(self, **kwargs):
        self.y_score = []
        self.y_true = []

    def on_batch_end(self, last_output, last_target, **kwargs):
        self.y_score.extend(to_np(torch.softmax(last_output, dim=1)[:, 1]))
        self.y_true.extend(to_np(last_target))

    def on_epoch_end(self, **kwargs):
        self.metric = roc_auc_score(self.y_true, self.y_score).item()

In [None]:
path = Path('../input')
train_csv_path = path/'train_labels.csv'
submission_csv_path = path/'sample_submission.csv'

In [None]:
BS=64
VAL_BS=64
CROP_CENTER=False
SIZE=32 if CROP_CENTER else 96
RESIZE=ResizeMethod.CROP
PAD='reflection'
DATA_PCT=1.0
VALID_PCT=0.2
MODEL='resnet34'
#MODEL_FUNC=lambda p: ptmodels.densenet121(pretrained='imagenet' if p else None)
#MODEL_FUNC=tvmodels.densenet201
MODEL_FUNC=models.resnet34
PRETRAINED=True
DROPOUT=0.2
METRICS=[accuracy, AucScore()]
LR_FIND=False

In [None]:
train_df = pd.read_csv(train_csv_path)
train_df.head()

In [None]:
train_df.groupby('label').count()

In [None]:
path.ls()

In [None]:
class MyImageItemList(ImageItemList):
    def open(self, fn):
        image = super().open(fn)
        if CROP_CENTER:
            image.pixel(lambda px: px[:, 32:64, 32:64])
        return image

tfms = get_transforms(flip_vert=True, max_rotate=0.0, max_zoom=0.1, max_lighting=0.05, max_warp=0.0)

data = (MyImageItemList.from_csv(path, 'train_labels.csv', folder='train', suffix='.tif')
        .use_partial_data(sample_pct=DATA_PCT, seed=42)
        .random_split_by_pct(valid_pct=VALID_PCT, seed=42)
        .label_from_df(classes=[0, 1])
        .add_test(MyImageItemList.from_csv(path, 'sample_submission.csv', folder='test', suffix='.tif'))
        .transform(tfms, size=SIZE, padding_mode=PAD, resize_method=RESIZE)
        .databunch(bs=BS, val_bs=VAL_BS)
        .normalize())

In [None]:
data.classes, data.c

In [None]:
len(data.train_ds), len(data.valid_ds), len(data.test_ds)

In [None]:
len([y for y in data.train_ds.y if y.obj == 0])

In [None]:
data.show_batch(rows=5, figsize=(12, 9))

In [None]:
learn = create_cnn(data, MODEL_FUNC, pretrained=PRETRAINED, ps=DROPOUT, metrics=METRICS, path='.')

In [None]:
#summary(learn.model, input_size=(3, SIZE, SIZE))

In [None]:
if PRETRAINED:
    learn.freeze()

In [None]:
if PRETRAINED and LR_FIND:
    learn.save(MODEL)
    learn.lr_find()
    learn.recorder.plot()
    _ = learn.load(MODEL)

In [None]:
%%time
if PRETRAINED:
    learn.fit_one_cycle(5, slice(3e-3))
    learn.save(MODEL)
    learn.recorder.plot_losses()

In [None]:
learn.validate(data.train_dl)

In [None]:
learn.validate(data.valid_dl)

In [None]:
learn.unfreeze()

In [None]:
if LR_FIND:
    learn.save(MODEL)
    learn.lr_find()
    learn.recorder.plot()
    _ = learn.load(MODEL)

In [None]:
%%time
learn.fit_one_cycle(5, slice(1e-5, 3e-3 / 5))
learn.save(MODEL)
learn.recorder.plot_losses()

In [None]:
learn.validate(data.train_dl)

In [None]:
learn.validate(data.valid_dl)

In [None]:
%%time
preds, _ = learn.get_preds(ds_type=DatasetType.Test)
pred_classes = to_np(preds[:, 1])

In [None]:
submission_df = pd.read_csv(submission_csv_path)
submission_df['label'] = pred_classes
submission_df.to_csv('./submission.csv', index=False)

In [None]:
submission_df = pd.read_csv('./submission.csv')
submission_df.head()