In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pathlib import Path
import fastai
from fastai.vision import *
import cv2
import torch
from torch.nn import Conv2d
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from fastai.callbacks import *
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
# print(os.listdir("../input"))
# !ls ../input
# Any results you write to the current directory are saved as output.

In [None]:
path = Path('../input/recursion-cellular-image-classification')

In [None]:
pixel_stats = pd.read_csv(path/'pixel_stats.csv')

In [None]:
pixel_stats.head()

In [None]:
# pixel_stats['exp_plate'] = pixel_stats['experiment'] + '' + pixel_stats['plate'].astype(str)
pixel_stats['exp_plate_channel'] = pixel_stats['experiment'] + '' + pixel_stats['plate'].astype(str) + '' + pixel_stats['channel'].astype(str)

In [None]:
pixel_stats_channel_mean = pixel_stats.groupby('exp_plate_channel').agg(np.mean).reset_index()

In [None]:
pixel_stats_channel_mean.head()

In [None]:
pixel_stats_channel_mean['exp_plate'] = pixel_stats_channel_mean.exp_plate_channel.apply(lambda s:s[:-1])

In [None]:
exp_plate_mean = dict()
for group in pixel_stats_channel_mean.groupby('exp_plate'):
    exp_plate_mean[group[0]] = torch.from_numpy(group[1]['mean'].to_numpy(dtype=np.float32).reshape(6, 1, 1))

In [None]:
exp_plate_std = dict()
for group in pixel_stats_channel_mean.groupby('exp_plate'):
    exp_plate_std[group[0]] = torch.from_numpy(group[1]['std'].to_numpy(dtype=np.float32).reshape(6, 1, 1))

In [None]:
tr_df = pd.read_csv(path/'train.csv')
tr_df['site'] = 1
tr_df_copy = tr_df.copy()
tr_df_copy['site'] = 2
tr_df = tr_df.append(tr_df_copy, ignore_index=True)

In [None]:
tr_df['cellline'] = tr_df.apply(lambda row : row.experiment[:-3], axis=1)

In [None]:
def get_img_path(suffix, experiment, plate, well, site=1):
    return suffix + experiment.lower() + '/' + 'plate' + str(plate) + '/' + well + '_s{}.npy'.format(site)

In [None]:
tr_df['path'] = tr_df.apply(lambda row : get_img_path('recursion-npy-train-', row.experiment, row.plate, row.well, row.site), axis=1)

In [None]:
tr_df.experiment.unique()

In [None]:
tr_exps = ['HUVEC-02', 'HUVEC-03', 'HUVEC-04', 'HUVEC-05',
           'HUVEC-06', 'HUVEC-07', 'HUVEC-08', 'HUVEC-09',
           'HUVEC-10', 'HUVEC-11', 'HUVEC-12', 'HUVEC-13']
va_exps = ['HUVEC-01', 'HUVEC-14', 'HUVEC-15', 'HUVEC-16']#, 'HEPG2-01', 'RPE-01', 'U2OS-01']

In [None]:
# tr_exps = ['U2OS-02', 'U2OS-03']
# va_exps = ['U2OS-01']

In [None]:
# tr_exps = ['RPE-02', 'RPE-03']
# va_exps = ['RPE-01']

In [None]:
# tr_exps = ['HEPG2-02', 'U2OS-02', 'RPE-02', 'HUVEC-02']
# va_exps = ['HEPG2-01', 'U2OS-01', 'RPE-01', 'HUVEC-01']

In [None]:
tr_df = tr_df[tr_df.experiment.str.contains('|'.join(tr_exps + va_exps))]
tr_df['is_valid'] = tr_df.experiment.str.contains('|'.join(va_exps))

In [None]:
class RecursionImageList(ImageList):
    def open(self, fn:PathOrStr) -> Image:
        plate = fn[-12]
        exp = fn[29:-18]
        exp_plate = exp.upper() + plate
        img = np.load(fn)
#         img = img / 255.0
        img = pil2tensor(img, dtype=np.float32)
#         img.sub_(exp_plate_mean[exp_plate])
#         img.div_(exp_plate_std[exp_plate])
#         img = F.avg_pool2d(img, kernel_size=2)
        return Image(img)

In [None]:
src = (
    RecursionImageList.from_df(tr_df, path=Path('../input'), cols=['path'])
)

In [None]:
src = src.split_from_df(col='is_valid')

In [None]:
src = src.label_from_df(cols=['sirna'])

In [None]:
img_size = 256
r = img_size / (512 * 2)
# r = 0.5

In [None]:
transforms = ([
    crop(size=img_size, row_pct=(r, 1 - r), col_pct=(r, 1 - r)),
    dihedral(),
], [
    crop(size=img_size, row_pct=0.5, col_pct=0.5),
])
# transforms = ([], [])

In [None]:
data = src.transform(size=img_size, tfms=transforms).databunch(bs=16, pin_memory=True)

In [None]:
data

In [None]:
precision = Precision(average="macro")
recall = Recall(average="macro")
loss_func = CrossEntropyFlat()

In [None]:
learn = cnn_learner(data, models.densenet201, metrics=[accuracy, error_rate],
                    loss_func=loss_func,
                    model_dir='/kaggle/working', pretrained=True, 
                    callback_fns=[CSVLogger])

In [None]:
trained_kernel = learn.model[0][0][0].weight
new_conv = Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
with torch.no_grad():
    new_conv.weight[:,:] = torch.stack([torch.mean(trained_kernel, 1)] * 6, dim=1)
learn.model[0][0][0] = new_conv
learn.model[0][0][0].requires_grad = True

In [None]:
# learn.load('../input/recursion-fast-ai-channel-all/huvec-25', with_opt=True);

In [None]:
learn.model.cuda();

In [None]:
# learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(skip_start=35, skip_end=18, suggestion=True)

In [None]:
learn.fit_one_cycle(25, 3e-3)

In [None]:
learn.csv_logger.read_logged_file()

In [None]:
# learn.fit_one_cycle(20, 3e-3)

In [None]:
# learn.fit_one_cycle(20, 3e-3)

In [None]:
# learn.fit_one_cycle(20, 3e-3)

In [None]:
# learn.fit_one_cycle(20, 3e-3)

In [None]:
# learn.fit_one_cycle(20, 3e-3)

In [None]:
# learn.fit_one_cycle(20, 3e-3)

In [None]:
learn.recorder.plot_losses()
learn.recorder.plot_metrics()

In [None]:
learn.save('huvec-25')

# Prediction

In [None]:
te_df = pd.read_csv(path/'test.csv')
te_df['path1'] = te_df.apply(lambda row : get_img_path('recursion-npy-test-', row.experiment, row.plate, row.well, site=1), axis=1)
te_df['path2'] = te_df.apply(lambda row : get_img_path('recursion-npy-test-', row.experiment, row.plate, row.well, site=2), axis=1)
test_src1 = RecursionImageList.from_df(te_df, path=Path('../input'), cols=['path1'])
test_src2 = RecursionImageList.from_df(te_df, path=Path('../input'), cols=['path2'])

In [None]:
learn.data.add_test(test_src1)
preds1, y = learn.get_preds(DatasetType.Test)

In [None]:
learn.data.add_test(test_src2)
preds2,y = learn.get_preds(DatasetType.Test)

In [None]:
preds = 0.5 * (preds1 + preds2)

In [None]:
te_df['sirna'] = preds.argmax(1)

In [None]:
te_df.to_csv('submission.csv', index=False, columns=['id_code','sirna'])