In [1]:
import os
import sys
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import torchvision.transforms as transforms
sys.path.append('../input/efficientnet/efficientnet-pytorch/EfficientNet-PyTorch/')
from efficientnet_pytorch import EfficientNet
from tqdm import tqdm
from PIL import Image
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from tqdm import tqdm_notebook
os.listdir('../input')

['efficientnet-b3-second-stage-training-rpe',
 'efficientnet-b3-second-stage-training-u2os',
 'recursion-cellular-image-classification',
 'efficientnet',
 'efficientnet-b3-second-stage-training-huvec',
 'efficientnet-b3-second-stage-training-hepg',
 'fork-of-efficientnet-b3-second-stage-trainig-huvec']

In [2]:
class CellDataset(Dataset):
    def __init__(self, df, img_dir, site=1, transforms=None):
        self.df = df
        self.img_dir = img_dir
        self.site = site
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        exp, well, plate = self.df.loc[idx, ['experiment', 'well', 'plate']].values
        img_channels = [np.array(Image.open(os.path.join(self.img_dir,
                                                         exp,
                                                         f'Plate{plate}',
                                                         f'{well}_s{self.site}_w{channel}.png')),
                                 dtype=np.float32) for channel in range(1, 7)]

        one_img = np.stack([channel for channel in img_channels], axis=2)

        if self.transforms is not None:
            one_img = self.transforms(one_img)
        if self.img_dir == '../input/recursion-cellular-image-classification/train/':
            return one_img, self.df.loc[idx, ['sirna']].astype('int32').values
        else:
            return one_img

In [3]:
test_df = pd.read_csv('../input/recursion-cellular-image-classification/test.csv')
sub = pd.read_csv('../input/recursion-cellular-image-classification/sample_submission.csv')
aug = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.485, 0.456, 0.456, 0.406, 0.406],
                                                                          std=[0.229, 0.229, 0.225, 0.225, 0.224, 0.224])])

test_df_huvec = test_df.drop(test_df[test_df.experiment.str.find('HUVEC') == -1].index).reset_index(drop=True)
test_df_hepg = test_df.drop(test_df[test_df.experiment.str.find('HEPG') == -1].index).reset_index(drop=True)
test_df_rpe = test_df.drop(test_df[test_df.experiment.str.find('RPE') == -1].index).reset_index(drop=True)
test_df_u2os = test_df.drop(test_df[test_df.experiment.str.find('U2OS') == -1].index).reset_index(drop=True)


test_dataset_huvec_s1 = CellDataset(df=test_df_huvec, img_dir='../input/recursion-cellular-image-classification/test/', transforms=aug)
test_dataset_huvec_s2 = CellDataset(df=test_df_huvec, img_dir='../input/recursion-cellular-image-classification/test/', transforms=aug, site=2)

test_dataset_hepg_s1 = CellDataset(df=test_df_hepg, img_dir='../input/recursion-cellular-image-classification/test/', transforms=aug)
test_dataset_hepg_s2 = CellDataset(df=test_df_hepg, img_dir='../input/recursion-cellular-image-classification/test/', transforms=aug, site=2)

test_dataset_rpe_s1 = CellDataset(df=test_df_rpe, img_dir='../input/recursion-cellular-image-classification/test/', transforms=aug)
test_dataset_rpe_s2 = CellDataset(df=test_df_rpe, img_dir='../input/recursion-cellular-image-classification/test/', transforms=aug, site=2)

test_dataset_u2os_s1 = CellDataset(df=test_df_u2os, img_dir='../input/recursion-cellular-image-classification/test/', transforms=aug)
test_dataset_u2os_s2 = CellDataset(df=test_df_u2os, img_dir='../input/recursion-cellular-image-classification/test/', transforms=aug, site=2)


test_loader_huvec_s1 = DataLoader(dataset=test_dataset_huvec_s1, batch_size=15, shuffle=False)
test_loader_huvec_s2 = DataLoader(dataset=test_dataset_huvec_s2, batch_size=15, shuffle=False)

test_loader_hepg_s1 = DataLoader(dataset=test_dataset_hepg_s1, batch_size=15, shuffle=False)
test_loader_hepg_s2 = DataLoader(dataset=test_dataset_hepg_s2, batch_size=15, shuffle=False)

test_loader_rpe_s1 = DataLoader(dataset=test_dataset_rpe_s1, batch_size=15, shuffle=False)
test_loader_rpe_s2 = DataLoader(dataset=test_dataset_rpe_s2, batch_size=15, shuffle=False)

test_loader_u2os_s1 = DataLoader(dataset=test_dataset_u2os_s1, batch_size=15, shuffle=False)
test_loader_u2os_s2 = DataLoader(dataset=test_dataset_u2os_s2, batch_size=15, shuffle=False)

## Huvec

In [4]:
model_huvec = EfficientNet.from_pretrained('efficientnet-b3', num_classes=1108)

trained_kernel_huvec = model_huvec._conv_stem.weight
new_conv_huvec = nn.Sequential(nn.Conv2d(6, 40, kernel_size=(3,3), stride=(2,2), bias=False), nn.ZeroPad2d(padding=(0, 1, 0, 1)))
with torch.no_grad():
    new_conv_huvec[0].weight[:,:] = torch.stack([torch.mean(trained_kernel_huvec, 1)]*6, dim=1)
model_huvec._conv_stem = new_conv_huvec
model_huvec = model_huvec.cuda()

# Load model
checkpoint_huvec = torch.load('../input/fork-of-efficientnet-b3-second-stage-trainig-huvec/model.tar')  # TODO : CHANGE PATH
model_huvec.load_state_dict(checkpoint_huvec['model_state_dict'])
model_huvec.eval()

        
score_s1 = []
score_s2 = []

with torch.no_grad():
    for data in tqdm_notebook(test_loader_huvec_s1):
        data = data.cuda()
        output = model_huvec(data)
        score_s1.append(output.cpu().numpy())
            
    for data in tqdm_notebook(test_loader_huvec_s2):
        data = data.cuda()
        output = model_huvec(data)
        score_s2.append(output.cpu().numpy())

#complete_huvec = np.maximum(np.concatenate(score_s1).squeeze(), np.concatenate(score_s1).squeeze())
#predicted_huvec = complete_huvec
complete_huvec = 0.5*np.asarray(score_s1) + 0.5*np.asarray(score_s2)
predicted_huvec = np.concatenate(complete_huvec).squeeze()
predictions_huvec = [np.where(arr == np.amax(arr))[0][0] for arr in predicted_huvec]

Downloading: "http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth" to /tmp/.cache/torch/checkpoints/efficientnet-b3-5fb5a3c3.pth
100%|██████████| 47.1M/47.1M [00:00<00:00, 118MB/s] 


Loaded pretrained weights for efficientnet-b3


HBox(children=(IntProgress(value=0, max=590), HTML(value='')))




HBox(children=(IntProgress(value=0, max=590), HTML(value='')))




## Hepg

In [5]:
model_hepg = EfficientNet.from_pretrained('efficientnet-b3', num_classes=1108)

trained_kernel_hepg = model_hepg._conv_stem.weight
new_conv_hepg = nn.Sequential(nn.Conv2d(6, 40, kernel_size=(3,3), stride=(2,2), bias=False), nn.ZeroPad2d(padding=(0, 1, 0, 1)))
with torch.no_grad():
    new_conv_hepg[0].weight[:,:] = torch.stack([torch.mean(trained_kernel_hepg, 1)]*6, dim=1)
model_hepg._conv_stem = new_conv_hepg
model_hepg = model_hepg.cuda()

# Load model
checkpoint_hepg = torch.load('../input/efficientnet-b3-second-stage-training-hepg/model.tar')  # TODO : CHANGE PATH
model_hepg.load_state_dict(checkpoint_hepg['model_state_dict'])
model_hepg.eval()

score_s1 = []
score_s2 = []

with torch.no_grad():
    for data in tqdm_notebook(test_loader_hepg_s1):
        data = data.cuda()
        output = model_hepg(data)
        score_s1.append(output.cpu().numpy())
            
    for data in tqdm_notebook(test_loader_hepg_s2):
        data = data.cuda()
        output = model_hepg(data)
        score_s2.append(output.cpu().numpy())

#complete_hepg = np.maximum(np.concatenate(score_s1).squeeze(), np.concatenate(score_s1).squeeze())
#predicted_hepg = complete_hepg
complete_hepg = 0.5*np.asarray(score_s1) + 0.5*np.asarray(score_s2)
predicted_hepg = np.concatenate(complete_hepg).squeeze()
predictions_hepg = [np.where(arr == np.amax(arr))[0][0] for arr in predicted_hepg]

Loaded pretrained weights for efficientnet-b3


HBox(children=(IntProgress(value=0, max=296), HTML(value='')))




HBox(children=(IntProgress(value=0, max=296), HTML(value='')))




## Rpe

In [6]:
model_rpe = EfficientNet.from_pretrained('efficientnet-b3', num_classes=1108)

trained_kernel_rpe = model_rpe._conv_stem.weight
new_conv_rpe = nn.Sequential(nn.Conv2d(6, 40, kernel_size=(3,3), stride=(2,2), bias=False), nn.ZeroPad2d(padding=(0, 1, 0, 1)))
with torch.no_grad():
    new_conv_rpe[0].weight[:,:] = torch.stack([torch.mean(trained_kernel_rpe, 1)]*6, dim=1)
model_rpe._conv_stem = new_conv_rpe
model_rpe = model_rpe.cuda()

# Load model
checkpoint_rpe = torch.load('../input/efficientnet-b3-second-stage-training-rpe/model.tar')  # TODO : CHANGE PATH
model_rpe.load_state_dict(checkpoint_rpe['model_state_dict'])
model_rpe.eval()

score_s1 = []
score_s2 = []

with torch.no_grad():
    for data in tqdm_notebook(test_loader_rpe_s1):
        data = data.cuda()
        output = model_rpe(data)
        score_s1.append(output.cpu().numpy())
            
    for data in tqdm_notebook(test_loader_rpe_s2):
        data = data.cuda()
        output = model_rpe(data)
        score_s2.append(output.cpu().numpy())

#complete_rpe = np.maximum(np.concatenate(score_s1).squeeze(), np.concatenate(score_s1).squeeze())
#predicted_rpe = complete_rpe
complete_rpe = 0.5*np.asarray(score_s1) + 0.5*np.asarray(score_s2)
predicted_rpe = np.concatenate(complete_rpe).squeeze()
predictions_rpe = [np.where(arr == np.amax(arr))[0][0] for arr in predicted_rpe]

Loaded pretrained weights for efficientnet-b3


HBox(children=(IntProgress(value=0, max=295), HTML(value='')))




HBox(children=(IntProgress(value=0, max=295), HTML(value='')))




## U2os

In [7]:
model_u2os = EfficientNet.from_pretrained('efficientnet-b3', num_classes=1108)

trained_kernel_u2os = model_u2os._conv_stem.weight
new_conv_u2os = nn.Sequential(nn.Conv2d(6, 40, kernel_size=(3,3), stride=(2,2), bias=False), nn.ZeroPad2d(padding=(0, 1, 0, 1)))
with torch.no_grad():
    new_conv_u2os[0].weight[:,:] = torch.stack([torch.mean(trained_kernel_u2os, 1)]*6, dim=1)
model_u2os._conv_stem = new_conv_u2os
model_u2os = model_u2os.cuda()

# Load model
checkpoint_u2os = torch.load('../input/efficientnet-b3-second-stage-training-u2os/model.tar')  # TODO : CHANGE PATH
model_u2os.load_state_dict(checkpoint_u2os['model_state_dict'])
model_u2os.eval()

score_s1 = []
score_s2 = []

with torch.no_grad():
    for data in tqdm_notebook(test_loader_u2os_s1):
        data = data.cuda()
        output = model_u2os(data)
        score_s1.append(output.cpu().numpy())
            
    for data in tqdm_notebook(test_loader_u2os_s2):
        data = data.cuda()
        output = model_u2os(data)
        score_s2.append(output.cpu().numpy())

#complete_u2os = np.maximum(np.concatenate(score_s1).squeeze(), np.concatenate(score_s1).squeeze())
#predicted_u2os = complete_u2os
complete_u2os = 0.5*np.asarray(score_s1) + 0.5*np.asarray(score_s2)
predicted_u2os = np.concatenate(complete_u2os).squeeze()
predictions_u2os = [np.where(arr == np.amax(arr))[0][0] for arr in predicted_u2os]

Loaded pretrained weights for efficientnet-b3


HBox(children=(IntProgress(value=0, max=147), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147), HTML(value='')))




In [8]:
sub['sirna'] = np.concatenate([predictions_hepg, predictions_huvec, predictions_rpe, predictions_u2os])
sub.to_csv('submission.csv', index=False, columns=['id_code','sirna'])

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [9]:
train_csv = pd.read_csv("../input/recursion-cellular-image-classification/train.csv")
test_csv = pd.read_csv("../input/recursion-cellular-image-classification/test.csv")
predicted = np.concatenate([np.concatenate(complete_hepg), np.concatenate(complete_huvec), np.concatenate(complete_rpe), np.concatenate(complete_u2os)]).squeeze()

plate_groups = np.zeros((1108,4), int)
for sirna in range(1108):
    grp = train_csv.loc[train_csv.sirna==sirna,:].plate.value_counts().index.values
    assert len(grp) == 3
    plate_groups[sirna,0:3] = grp
    plate_groups[sirna,3] = 10 - grp.sum()
    
all_test_exp = test_csv.experiment.unique() 

group_plate_probs = np.zeros((len(all_test_exp),4))
for idx in range(len(all_test_exp)):
    preds = sub.loc[test_csv.experiment == all_test_exp[idx],'sirna'].values
    pp_mult = np.zeros((len(preds),1108))
    pp_mult[range(len(preds)),preds] = 1
    
    sub_test = test_csv.loc[test_csv.experiment == all_test_exp[idx],:]
    assert len(pp_mult) == len(sub_test)
    
    for j in range(4):
        mask = np.repeat(plate_groups[np.newaxis, :, j], len(pp_mult), axis=0) == \
               np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
        
        group_plate_probs[idx,j] = np.array(pp_mult)[mask].sum()/len(pp_mult)
        
exp_to_group = group_plate_probs.argmax(1)
print(exp_to_group)

def select_plate_group(pp_mult, idx):
    sub_test = test_csv.loc[test_csv.experiment == all_test_exp[idx],:]
    assert len(pp_mult) == len(sub_test)
    mask = np.repeat(plate_groups[np.newaxis, :, exp_to_group[idx]], len(pp_mult), axis=0) != \
           np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
    pp_mult[mask] = -1000000. # this was originally 0 in this notebook
    return pp_mult


for idx in range(len(all_test_exp)):
    #print('Experiment', idx)
    indices = (test_csv.experiment == all_test_exp[idx])
    
    preds = predicted[indices,:].copy()
    
    preds = select_plate_group(preds, idx)
    sub.loc[indices,'sirna'] = preds.argmax(1)
    
sub.to_csv('submission_leak.csv', index=False, columns=['id_code','sirna'])

[3 1 0 0 0 0 2 2 3 0 0 3 1 0 0 0 2 3]


In [10]:
from IPython.display import FileLink
FileLink('submission_leak.csv')