In [None]:
# Imports
import numpy as np
import pandas as pd
import fastai 
from pathlib import Path
from fastai.vision.all import *
from fastai.basics import *

import albumentations as A

# Input data files are available in the read-only "../input/" directory
import os

In [None]:
path = Path('../input/hpa-cell-tiles-sample-balanced-dataset')
df = pd.read_csv(path/'cell_df.csv')
df.head()

In [None]:
# extract the the total number of target labels
labels = [str(i) for i in range(19)]
for x in labels: df[x] = df['image_labels'].apply(lambda r: int(x in r.split('|')))

In [None]:
# Here a sample of the dataset has been taken, change frac to 1 to train the entire dataset!
dfs = df.sample(frac=1, random_state=42)
dfs = dfs.reset_index(drop=True)
len(dfs)

# Preprocessing

In [None]:
# obtain the input images.
def get_x(r): 
    return path/'cells'/(r['image_id']+'_'+str(r['cell_id'])+'.jpg')

# obtain the targets.
def get_y(r): 
    return r['image_labels'].split('|')

In [None]:
'''AlbumentationsTransform will perform different transforms over both
   the training and validation datasets ''' 
class AlbumentationsTransform(RandTransform):
    
    '''split_idx is None, which allows for us to say when we're setting our split_idx.
       We set an order to 2 which means any resize operations are done first before our new transform. '''
    split_idx, order = None, 2
    
    def __init__(self, train_aug, valid_aug): store_attr()
    
    # Inherit from RandTransform, allows for us to set that split_idx in our before_call.
    def before_call(self, b, split_idx):
        self.idx = split_idx
    
    # If split_idx is 0, run the trainining augmentation, otherwise run the validation augmentation. 
    def encodes(self, img: PILImage):
        if self.idx == 0:
            aug_img = self.train_aug(image=np.array(img))['image']
        else:
            aug_img = self.valid_aug(image=np.array(img))['image']
        return PILImage.create(aug_img)

In [None]:
def get_train_aug(size): 
    
    return A.Compose([
            # allows to combine RandomCrop and RandomScale
            A.RandomResizedCrop(size,size),
            
            # Transpose the input by swapping rows and columns.
            A.Transpose(p=0.5),
        
            # Flip the input horizontally around the y-axis.
            A.HorizontalFlip(p=0.5),
        
            # Flip the input horizontally around the x-axis.
            A.VerticalFlip(p=0.5),
        
            # Randomly apply affine transforms: translate, scale and rotate the input.
            A.ShiftScaleRotate(p=0.5),
        
            # Randomly change hue, saturation and value of the input image.
            A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
        
            # Randomly change brightness and contrast of the input image.
            A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
        
            # CoarseDropout of the rectangular regions in the image.
            A.CoarseDropout(p=0.5),
        
            # CoarseDropout of the square regions in the image.
            A.Cutout(p=0.5) ])

def get_valid_aug(size): 
    
    return A.Compose([
    # Crop the central part of the input.   
    A.CenterCrop(size, size, p=1.),
    
    # Resize the input to the given height and width.    
    A.Resize(size,size)], p=1.)

get_train_aug(6)

In [None]:
'''The first step item_tfms resizes all the images to the same size (this happens on the CPU) 
   and then batch_tfms happens on the GPU for the entire batch of images. '''
# Transforms we need to do for each image in the dataset
item_tfms = [Resize(224), AlbumentationsTransform(get_train_aug(224), get_valid_aug(224))]

# Transforms that can take place on a batch of images
batch_tfms = [Normalize.from_stats(*imagenet_stats)]

bs=6

In [None]:
dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock(vocab=labels)),
                  splitter = RandomSplitter(seed=42),
                  get_x=get_x,
                  get_y=get_y,
                  item_tfms=item_tfms,
                  batch_tfms=batch_tfms)
dls = dblock.dataloaders(dfs, bs=bs)

In [None]:
dls.show_batch()

In [None]:
#learn = cnn_learner(dls, ('../resnet50/resnet50.pth'), metrics=accuracy_multi)
#learn.lr_find()

In [None]:
#learn.fine_tune(4,0.00144)

In [None]:
#learn.export()
learn = load_learner('../input/hpamodelmvd/export.pkl')

In [None]:
path = Path('../input/hpa-cell-tiles-test-with-enc-dataset')
df = pd.read_csv(path/'cell_df.csv')
df.head()

In [None]:
df.to_csv('cell_df.csv', index=False)

In [None]:
test_dl = learn.dls.test_dl(df)

In [None]:
test_dl.show_batch()

In [None]:
preds, _ = learn.get_preds(dl=test_dl)

In [None]:
preds.shape

In [None]:
with open('preds.pickle', 'wb') as handle:
    pickle.dump(preds, handle)

In [None]:
cls_prds = torch.argmax(preds, dim=-1)
len(cls_prds), cls_prds

In [None]:
sample_submission = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')
sample_submission.head()

In [None]:
df['cls'] = cls_prds
df['pred'] = df[['cls', 'enc']].apply(lambda r: str(r[0]) + ' 1 ' + r[1], axis=1)
df.head()

In [None]:
subm = df.groupby(['image_id'])['pred'].apply(lambda x: ' '.join(x)).reset_index()
# subm = subm.loc[3:]
subm.head()

In [None]:
sub = pd.merge(
    sample_submission,
    subm,
    how="left",
    left_on='ID',
    right_on='image_id',
)

In [None]:
sub.head()

In [None]:
def isNaN(num):
    return num != num

In [None]:
for i, row in sub.iterrows():
    if isNaN(row['pred']): continue
    sub.PredictionString.loc[i] = row['pred']

In [None]:
sub = sub[sample_submission.columns]
sub.head()

In [None]:
sub.to_csv('submission.csv', index=False)

In [None]:
cell_df = pd.read_csv('cell_df.csv')
cell_df.head()
cell_df['cls'] = ''

threshold = 0.0

for i in range(preds.shape[0]): 
    p = torch.nonzero(preds[i] > threshold).squeeze().numpy().tolist()
    if type(p) != list: p = [p]
    if len(p) == 0: cls = [(preds[i].argmax().item(), preds[i].max().item())]
    else: cls = [(x, preds[i][x].item()) for x in p]
    cell_df['cls'].loc[i] = cls

In [None]:
def combine(r):
    cls = r[0]
    enc = r[1]
    classes = [str(c[0]) + ' ' + str(c[1]) + ' ' + enc for c in cls]
    return ' '.join(classes)

combine(cell_df[['cls', 'enc']].loc[24])

In [None]:
cell_df['pred'] = cell_df[['cls', 'enc']].apply(combine, axis=1)
cell_df.head()

In [None]:
sample_submission = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')
sample_submission.head()

In [None]:
sub = pd.merge(
    sample_submission,
    subm,
    how="left",
    left_on='ID',
    right_on='image_id',
)
sub.head()

In [None]:
def isNaN(num):
    return num != num

for i, row in sub.iterrows():
    if isNaN(row['pred']): continue
    sub.PredictionString.loc[i] = row['pred']

In [None]:
sub = sub[sample_submission.columns]
sub.head()