# fastai quick submission template

Solution overview: https://www.kaggle.com/c/hpa-single-cell-image-classification/discussion/221550

I want to experiment quickly and can't wait for the lb score, especially that with weak labels I haven't found a reasonable way to do CV, and depend on the public lb score. I have pre-processed the public test images in the same way as my prototyping dataset and submit my preds only for this piece. These submissions will get zero score on private, but there is still lots of time in the competition and better approaches will be developed .

In [None]:
import pandas as pd
import numpy as np
import pickle
import torch
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
with open('../input/fastai-cell-tile-prototyping-3/tta.pickle', 'rb') as handle:
    preds = pickle.load(handle)

In [None]:
class_means = preds.mean(dim=0).numpy()
labels = range(19)
plt.bar(labels, class_means)
plt.show()

In [None]:
row_max = preds.max(dim=-1).values.numpy()
plt.hist(row_max, bins=100)
plt.show()

In [None]:
fig = plt.figure(figsize=(16, 12))

for i in labels:
    ax = fig.add_subplot(5,4,i+1)
    ax.hist(preds[:,i].numpy(), bins=100)
    ax.set_title(i)

plt.show()

In [None]:
cell_df = pd.read_csv('../input/fastai-cell-tile-prototyping-3/cell_df.csv')
cell_df.head()
cell_df['cls'] = ''

In [None]:
threshold = 0.0

for i in range(preds.shape[0]): 
    p = torch.nonzero(preds[i] > threshold).squeeze().numpy().tolist()
    if type(p) != list: p = [p]
    if len(p) == 0: cls = [(preds[i].argmax().item(), preds[i].max().item())]
    else: cls = [(x, preds[i][x].item()) for x in p]
    cell_df['cls'].loc[i] = cls

In [None]:
def combine(r):
    cls = r[0]
    enc = r[1]
    classes = [str(c[0]) + ' ' + str(c[1]) + ' ' + enc for c in cls]
    return ' '.join(classes)

combine(cell_df[['cls', 'enc']].loc[24])

In [None]:
cell_df['pred'] = cell_df[['cls', 'enc']].apply(combine, axis=1)
cell_df.head()

In [None]:
subm = cell_df.groupby(['image_id'])['pred'].apply(lambda x: ' '.join(x)).reset_index()
# subm = subm.loc[3:]
subm.head()

In [None]:
sample_submission = pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv')
sample_submission.head()

In [None]:
sub = pd.merge(
    sample_submission,
    subm,
    how="left",
    left_on='ID',
    right_on='image_id',
)
sub.head()

In [None]:
def isNaN(num):
    return num != num

for i, row in sub.iterrows():
    if isNaN(row['pred']): continue
    sub.PredictionString.loc[i] = row['pred']

In [None]:
sub = sub[sample_submission.columns]
sub.head()

In [None]:
sub.to_csv('submission.csv', index=False)