In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

from fastai.conv_learner import *
from fastai.dataset import *
from fastai.models.resnet import vgg_resnet50
from fastai.models.unet import *
from torch import nn
from PIL import Image as PILImage

import json

torch.backends.cudnn.benchmark=True

torch.cuda.is_available()

True

In [13]:
from sklearn.model_selection import train_test_split

PATH = Path('data')

train_df = pd.read_csv(PATH/'train.csv')

trn_ids = [o for o in train_df.Id]
trn_targs = [o for o in train_df.Target]

train, val = train_test_split(train_df, test_size=0.2)

In [14]:
trn_x = list(train['Id'])
trn_y = list(train['Target'])

In [15]:
val_x = list(val['Id'])
val_y = list(val['Target'])

# Data

## Definition

In [2]:
LABEL_MAP = {
0: "Nucleoplasm" ,
1: "Nuclear membrane"   ,
2: "Nucleoli"   ,
3: "Nucleoli fibrillar center",   
4: "Nuclear speckles"   ,
5: "Nuclear bodies"   ,
6: "Endoplasmic reticulum"   ,
7: "Golgi apparatus"  ,
8: "Peroxisomes"   ,
9:  "Endosomes"   ,
10: "Lysosomes"   ,
11: "Intermediate filaments"  , 
12: "Actin filaments"   ,
13: "Focal adhesion sites"  ,
14: "Microtubules"   ,
15: "Microtubule ends"   ,
16: "Cytokinetic bridge"   ,
17: "Mitotic spindle"  ,
18: "Microtubule organizing center",  
19: "Centrosome",
20: "Lipid droplets"   ,
21: "Plasma membrane"  ,
22: "Cell junctions"   ,
23: "Mitochondria"   ,
24: "Aggresome"   ,
25: "Cytosol" ,
26: "Cytoplasmic bodies",
27: "Rods & rings"}

In [102]:
from sklearn.preprocessing import MultiLabelBinarizer
from torchvision.transforms import ToTensor

In [197]:
class MatchedFilesDataset(FilesDataset):
    def __init__(self, fnames, y, transform, path):
        self.y=y
        assert(len(fnames)==len(y))
        super().__init__(fnames, transform, path)
        self.mlb = MultiLabelBinarizer(classes=list(LABEL_MAP.keys()))
        self.images_df = train_df.copy()
        self.BANDS_NAMES = ['_red.png','_green.png','_blue.png','_yellow.png']
    
    def get_x(self, i): 
        im = self._load_multiband_image(i)
        im = np.array(im, dtype=float)
        im = np.transpose(im, (2, 1, 0))
        to_tensor = ToTensor()
        to_tensor(im)
        return im 
        
    def get_y(self, i): 
        y = None
        y = self._load_multilabel_target(i)
        labels_one_hot  = self.mlb.fit_transform([y])
        return labels_one_hot
    
    def get_c(self): return 28
    
    def _load_multiband_image(self, i):
        row = self.images_df.iloc[i]
        image_bands = []
        for band_name in self.BANDS_NAMES:
            p = str(row.Id) + band_name
            pil_channel = Image.open(PATH/'train'/p)
            image_bands.append(pil_channel)
            
        # lets pretend its a RBGA image to support 4 channels
        band4image = Image.merge('RGBA', bands=image_bands)
        return band4image
    
    def _load_multilabel_target(self, index):
        return list(map(int, self.images_df.iloc[index].Target.split(' ')))

# Model

In [198]:
def get_model(n_classes, image_channels=4):
    model = resnet50(pretrained=True)
    for p in model.parameters():
        p.requires_grad = True
    inft = model.fc.in_features
    model.fc = nn.Linear(in_features=inft, out_features=n_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    model.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
    return model 

In [199]:
class CustomModel():
    def __init__(self,model,name='res50_4'):
        self.model,self.name = model,name

    def get_layer_groups(self, precompute=False):
        return [children(self.model)]

In [200]:
def get_resnet50_model(md, crit):
    cut,lr_cut = model_meta[resnet50]
    res50_4 = to_gpu(get_model(28,4))
    models = CustomModel(res50_4, lr_cut)

    learn = ConvLearner(md, models)
    learn.opt_fn = optim.Adam
    learn.crit = crit
    learn.metrics=[accuracy_thresh(0.2)]
    return learn

In [201]:
def get_model_data(sz, bs):
    aug_tfms = [RandomFlip()]

    tfms = tfms_from_model(resnet50, sz, 
                   crop_type=CropType.NO, 
                   aug_tfms=aug_tfms)
    tfms[0].tfms = tfms[0].tfms[0:2]
    datasets=ImageData.get_ds(MatchedFilesDataset, (trn_x, trn_y), 
                      (val_x, val_y), tfms, path=PATH)

    md = ImageData(PATH, datasets, bs, num_workers=16, classes=None)
    return md

# Test Model

In [202]:
md = get_model_data(128, 1)

In [203]:
learn = get_resnet50_model(md, nn.BCEWithLogitsLoss())

In [None]:
learn.lr_find()
learn.sched.plot()

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

  0%|          | 0/24857 [00:00<?, ?it/s]

# Train Model

In [66]:
lr = 2e-5

In [None]:
%time learn.fit(lr,3,wds=1e-7,cycle_len=30,use_clr_beta=(10,10, 0.85, 0.9), use_wd_sched=True, best_save_name='FAI_BaseMk1')

HBox(children=(IntProgress(value=0, description='Epoch', max=90), HTML(value='')))

 69%|██████▉   | 2142/3108 [08:02<03:37,  4.44it/s, loss=0.153]

# Prepare Submission

In [80]:
def make_submission_file(sample_submission_df, predictions):
    submissions = []
    for row in predictions:
        subrow = ' '.join(list([str(i) for i in np.nonzero(row)[0]]))
        submissions.append(subrow)
    
    sample_submission_df['Predicted'] = submissions
    sample_submission_df.to_csv('submission_FA.csv', index=None)
    
    return sample_submission_df

In [None]:
learn.load('FAI_BaseMk1')

In [73]:
pred = learn.predict(is_test=True)

In [74]:
pred.shape

(11702, 28)

In [79]:
pred>0.2

array([[ True, False,  True, ...,  True, False, False],
       [ True, False, False, ..., False, False, False],
       [ True, False, False, ...,  True, False, False],
       ...,
       [ True, False, False, ..., False, False, False],
       [ True, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [81]:
THRESHOLD = 0.2
p = pred>THRESHOLD

submission_file = make_submission_file(sample_submission_df=submit_df,
                     predictions=p)