In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
import matplotlib.pyplot as plt
from fastai.vision import *
import os

%reload_ext autoreload
%autoreload 2
%matplotlib inline

print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

## Preparaing old training dataset

In [None]:
train_df1 = pd.read_csv("../input/resized-2015-2019-blindness-detection-images/labels/trainLabels15.csv")
train_df1.head()

In [None]:
img = plt.imread(f'../input/resized-2015-2019-blindness-detection-images/resized train 15/10_left.jpg')
img.shape

In [None]:
def display_samples(df, columns = 4, rows = 3):
    fig = plt.figure(figsize = (5 * columns, 4 * rows))

    for i in range(columns * rows):
        image_path = df.loc[i,'image']
        image_id = df.loc[i,'level']
        img = plt.imread(f'../input/resized-2015-2019-blindness-detection-images/resized train 15/{image_path}.jpg')        
        fig.add_subplot(rows, columns, i+1)
        plt.title(image_id)
        plt.imshow(img)
    
    plt.tight_layout()

display_samples(train_df1)


In [None]:
train_df1.groupby('level').count()

In [None]:
train_df1.shape[0]

In [None]:
path = Path('../input/resized-2015-2019-blindness-detection-images/')
path.ls()

In [None]:
tfms = get_transforms(do_flip=True, flip_vert=True, max_lighting=0.1, max_warp=0., max_rotate=30.0)

In [None]:
np.random.seed(42)

src = (ImageList.from_csv(path, 'labels/trainLabels15.csv', folder='resized train 15', suffix='.jpg')
       .split_by_rand_pct(0.2, seed=42)
       .label_from_df(cols='level'))

### incremental learning: 224 image size

In [None]:
data = (src.transform(tfms, size=224)
        .databunch(num_workers=4).normalize(imagenet_stats))

In [None]:
data.show_batch(rows=3, figsize=(12,9))

## Training old dataset using resnet 34

In [None]:
!mkdir -p /tmp/.cache/torch/checkpoints/
!cp ../input/resnet34/resnet34.pth /tmp/.cache/torch/checkpoints/resnet34-333f7ec4.pth

In [None]:
arch = models.resnet34
learn = cnn_learner(data, arch, metrics=[accuracy], model_dir='/tmp/models')

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(4, 1e-2)
learn.recorder.plot_losses()
learn.recorder.plot_metrics()

In [None]:
learn.save("model1_224_stage1")

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.most_confused(min_val=2)

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(2, slice(1e-5, 1e-4))


In [None]:
learn.save("model1_224_stage2")

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.most_confused(min_val=2)

### incremental learning: 552 size

In [None]:
data = (src.transform(tfms, size=552)
        .databunch(num_workers=0).normalize(imagenet_stats))

learn.data = data

In [None]:
learn.freeze()

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(2, slice(1e-3,1e-2))

In [None]:
learn.save('model1_552_stage1')

In [None]:
learn.recorder.plot_losses()
learn.recorder.plot_metrics()

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.most_confused(min_val=2)

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(2, slice(1e-5,1e-4))

In [None]:
learn.save('model1_552_stage2')

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.most_confused(min_val=2)

In [None]:
learn.freeze()

## Training on new training dataset

In [None]:
path = Path('../input/aptos2019-blindness-detection/')
path.ls()

In [None]:
np.random.seed(42)

src = (ImageList.from_csv(path, 'train.csv', folder='train_images', suffix='.png')
       .split_by_rand_pct(0.2)
       .label_from_df(cols='diagnosis'))

In [None]:
# data = (src.transform(tfms, size=128)
#         .databunch(num_workers=4).normalize(imagenet_stats))

# learn.data = data

In [None]:
# data.show_batch(rows=3, figsize=(12,9))

In [None]:
# learn.lr_find()
# learn.recorder.plot(suggestion=True)

In [None]:
# learn.fit_one_cycle(4, slice(1e-3))

In [None]:
# learn.save('model2_128_stage1')

In [None]:
# interp = ClassificationInterpretation.from_learner(learn)
# interp.most_confused(min_val=2)

In [None]:
# learn.unfreeze()

In [None]:
# learn.lr_find()
# learn.recorder.plot(suggestion=True)

In [None]:
# learn.fit_one_cycle(4, slice(1e-3))

In [None]:
# learn.save('model2_128_stage2')

In [None]:
# interp = ClassificationInterpretation.from_learner(learn)
# interp.most_confused(min_val=2)

In [None]:
# learn.freeze()

In [None]:
data = (src.transform(tfms, size=552)
        .databunch(num_workers=0).normalize(imagenet_stats))

learn.data = data

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(4, slice(1e-2))

In [None]:
learn.save('model2_552_stage1')

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.most_confused(min_val=2)

In [None]:
# valid_preds = learn.get_preds(ds_type=DatasetType.Valid)
pred_valid, y_valid = learn.TTA(ds_type=DatasetType.Valid)

In [None]:
len(y_valid)

In [None]:
(np.array(np.argmax(pred_valid, axis=1))==np.array(y_valid)).sum()

In [None]:
unique_pred, counts_pred = np.unique(np.argmax(pred_valid, axis=1), return_counts=True)
np.asarray((unique_pred, counts_pred)).T

In [None]:
unique_act, counts_act = np.unique(y_valid, return_counts=True)
np.asarray((unique_act, counts_act)).T

In [None]:
learn.validate()

In [None]:
learn.validate(learn.data.train_dl)

In [None]:
test_df = pd.read_csv(path/'test.csv')
test_df.head() 

In [None]:
test_data = ImageList.from_df(test_df, path/'test_images', suffix='.png')
data.add_test(test_data)
len(test_data)

In [None]:
pred, _ = learn.TTA(ds_type=DatasetType.Test)

In [None]:
# preds, _ = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
pred.size()

In [None]:
pred

In [None]:
test_df['diagnosis'] = np.argmax(pred, axis=1) 

In [None]:
print(test_df['diagnosis'].max(), test_df['diagnosis'].min())
count_df = test_df.groupby('diagnosis').count()
print(count_df)

In [None]:
test_df.head()

In [None]:
test_df.to_csv('submission.csv', index=False)

In [None]:
os.listdir()