In [None]:
import numpy as np
import pandas as pd
import os

from tqdm import tqdm
import PIL
import cv2
from PIL import Image, ImageOps

from keras.models import Sequential, load_model
from keras.layers import (Activation, Dropout, Flatten, Dense, Input, Conv2D, GlobalAveragePooling2D)
from keras.applications.densenet import DenseNet121
import keras
from keras.models import Model

SIZE = 224
NUM_CLASSES = 1108


In [None]:
train_csv = pd.read_csv("../input/recursion-cellular-image-classification-224-jpg/new_train.csv")
test_csv = pd.read_csv("../input/recursion-cellular-image-classification-224-jpg/new_test.csv")
sub = pd.read_csv("../input/recursion-cellular-keras-densenet/submission.csv")

In [None]:
print(os.listdir("../input"))

In [None]:
train_csv = train_csv[:36515]

In [None]:
train_csv

In [None]:
test_csv = test_csv[:19897 ]

In [None]:
test_csv

In [None]:
sub

# Train data

In [None]:
plate = train_csv.plate.unique()
well = train_csv.well.unique()
print (len(plate), len(well))

In [None]:
np.stack([train_csv.plate.values[train_csv.sirna == i] for i in range(10)]).transpose()

In [None]:
train_csv.loc[train_csv.sirna==0,'plate'].value_counts()

In [None]:
plate_groups = np.zeros((1108,4), int)
for sirna in range(1108):
    grp = train_csv.loc[train_csv.sirna==sirna,:].plate.value_counts().index.values
    assert len(grp) == 3
    plate_groups[sirna,0:3] = grp
    plate_groups[sirna,3] = 10 - grp.sum()
    
plate_groups[:10,:]

# Test data

In [None]:
all_test_exp = test_csv.experiment.unique()

group_plate_probs = np.zeros((len(all_test_exp),4))
for idx in range(len(all_test_exp)):
    preds = sub.loc[test_csv.experiment == all_test_exp[idx],'sirna'].values
    pp_mult = np.zeros((len(preds),1108))
    pp_mult[range(len(preds)),preds] = 1
    
    sub_test = test_csv.loc[test_csv.experiment == all_test_exp[idx],:]
    assert len(pp_mult) == len(sub_test)
    
    for j in range(4):
        mask = np.repeat(plate_groups[np.newaxis, :, j], len(pp_mult), axis=0) == \
               np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
        
        group_plate_probs[idx,j] = np.array(pp_mult)[mask].sum()/len(pp_mult)

In [None]:
pd.DataFrame(group_plate_probs, index = all_test_exp)

In [None]:
exp_to_group = group_plate_probs.argmax(1)
print(exp_to_group)

# Running predictions - DenseNet121 model

In [None]:
def create_model(input_shape,n_out):
    input_tensor = Input(shape=input_shape)
    base_model = DenseNet121(include_top=False,
                   weights=None,
                   input_tensor=input_tensor)
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(1024, activation='relu')(x)
 
    final_output = Dense(n_out, activation='softmax', name='final_output')(x)
    model = Model(input_tensor, final_output)
    
    return model

In [None]:
model = create_model(input_shape=(SIZE,SIZE,3),n_out=NUM_CLASSES)

In [None]:
model.load_weights('../input/recursion-cellular-keras-densenet/Densenet121.h5')

In [None]:
predicted = []
for i, name in tqdm(enumerate(test_csv['id_code'])):
    path1 = os.path.join('../input/recursion-cellular-image-classification-224-jpg/test/test/', name+'_s1.jpeg')
    image1 = cv2.imread(path1)
    score_predict1 = model.predict((image1[np.newaxis])/255)
    
    path2 = os.path.join('../input/recursion-cellular-image-classification-224-jpg/test/test/', name+'_s2.jpeg')
    image2 = cv2.imread(path2)
    score_predict2 = model.predict((image2[np.newaxis])/255)
    
    predicted.append(0.5*(score_predict1 + score_predict2))
    #predicted.append(score_predict1)

In [None]:
predicted = np.stack(predicted).squeeze()

In [None]:
def select_plate_group(pp_mult, idx):
    sub_test = test_csv.loc[test_csv.experiment == all_test_exp[idx],:]
    assert len(pp_mult) == len(sub_test)
    mask = np.repeat(plate_groups[np.newaxis, :, exp_to_group[idx]], len(pp_mult), axis=0) != \
           np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
    pp_mult[mask] = 0
    return pp_mult

In [None]:
for idx in range(len(all_test_exp)):
    #print('Experiment', idx)
    indices = (test_csv.experiment == all_test_exp[idx])
    
    preds = predicted[indices,:].copy()
    
    preds = select_plate_group(preds, idx)
    sub.loc[indices,'sirna'] = preds.argmax(1)

In [None]:
(sub.sirna == pd.read_csv("../input/recursion-cellular-keras-densenet/submission.csv").sirna).mean()

In [None]:
sub.to_csv('../working/submission.csv', index=False, columns=['id_code','sirna'])