In [1]:
import os
import sys
import numpy as np
import pandas as pd
import random

from argparse import ArgumentParser
import datetime

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.backends.cudnn as cudnn

from pytvision.transforms import transforms as mtrans
from pytvision import visualization as view

sys.path.append('../')
from torchlib.datasets.datasets import ATLASDataset 
from torchlib.neuralnet import NeuralNetClassifier
from misc import get_transforms_det

from sklearn import metrics

In [2]:
pathname = os.path.expanduser( '~/.datasets' )
projectnet='../out/netruns'
no_cuda=False
seed=1
gpu=0
batch_size=1
workers=0
num_input_channels=3

print(pathname)
print(projectnet)

/root/.datasets
../out/netruns


In [3]:
!ls ../out/netruns/

atlas_baseline_preactresnet18_dice_adam_atlas_001
atlas_baseline_preactresnet18_dice_sgd_atlas_001
atlas_baseline_preactresnet18_focal_adam_atlas_001
atlas_baseline_preactresnet18_focal_sgd_atlas_001
atlas_baseline_preactresnet18_l1_sgd_atlas_001
atlas_baseline_preactresnet18_mse_sgd_atlas_001
atlas_baseline_preactresnet34_focal_adam_atlas_002
atlas_baseline_resnet18_focal_adam_atlas_003
baseline_preactresnet18_mse_sgd_atlas_001


In [4]:
name = 'atlas_baseline_resnet18_focal_adam_atlas_003'
project = os.path.join(projectnet, name)
model = 'model_best.pth.tar' #'chk000025.pth.tar', model_best
pathmodels = os.path.join(project,'models',model)

print(project)
print(pathmodels)

../out/netruns/atlas_baseline_resnet18_focal_adam_atlas_003
../out/netruns/atlas_baseline_resnet18_focal_adam_atlas_003/models/model_best.pth.tar


In [5]:
# neuralnet
network = NeuralNetClassifier(
    patchproject=project,
    nameproject=name,
    no_cuda=no_cuda,
    seed=seed,
    gpu=gpu
    )

cudnn.benchmark = True

# load model
if network.load( pathmodels ) is not True:
    assert(False)

Without the incoming socket you cannot receive events from the server or register event handlers to your Visdom client.


=> loading checkpoint '../out/netruns/atlas_baseline_resnet18_focal_adam_atlas_003/models/model_best.pth.tar'
=> loaded checkpoint for resnet18 arch!


In [6]:
pathdata='~/.kaggle/competitions/human-protein-atlas-image-classification'
name_dataset='atlas'
subset='test'

data = ATLASDataset(        
    path=pathdata, 
    train=False,
    folders_images='test', 
    metadata='sample_submission.csv',
    #count=100,
    num_channels=3,
    transform=get_transforms_det( network.size_input ), #get_transforms_aug
    )

dataloader = DataLoader(data, batch_size=batch_size, shuffle=False, num_workers=workers )

print('Data loader ')
print(len(dataloader))
print(len(data))

Data loader 
11702
11702


In [7]:
Id, Yhat, Y = network.predict( dataloader )
df = pd.DataFrame( np.concatenate((Id[:,np.newaxis], Yhat), axis=1) )
df.to_csv( os.path.join(project , '{}_{}_{}_dp.csv'.format(subset,name,name_dataset)), index=False, encoding='utf-8')        
print('dir: {}'.format(project))
print('DONE!!!')

100%|██████████| 11702/11702 [08:03<00:00, 24.23it/s]


dir: ../out/netruns/atlas_baseline_resnet18_focal_adam_atlas_003
DONE!!!


In [8]:
df = pd.read_csv( os.path.join(project , '{}_{}_{}_dp.csv'.format(subset, name, name_dataset))  )
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
0,00008af0-bad0-11e8-b2b8-ac1f6b6435d0,0.014977,0.00047,0.505491,0.442924,4e-05,0.000245,6.9e-05,0.230353,0.002472,...,0.000587,0.000635,0.000852,0.000167,2.3e-05,0.000466,0.000219,0.000251,0.000331,0.000563
1,0000a892-bacf-11e8-b2b8-ac1f6b6435d0,0.017088,0.000307,3.6e-05,0.000215,1.9e-05,0.090377,0.000679,0.001496,0.000222,...,0.000379,0.977912,9e-06,0.017314,1.8e-05,0.004228,0.005367,0.003088,0.000925,0.000327
2,0006faa6-bac7-11e8-b2b7-ac1f6b6435d0,0.959035,0.011708,0.039709,0.008723,0.007289,0.075253,0.000298,0.017862,0.004154,...,0.005471,0.079039,0.001473,0.034288,0.004887,0.001043,0.00114,0.82905,0.00192,0.001345
3,0008baca-bad7-11e8-b2b9-ac1f6b6435d0,0.997869,0.001703,0.001009,0.0008,0.000308,0.002459,0.002811,0.096985,0.001681,...,0.037807,5.6e-05,0.000351,0.010953,0.0002,0.001732,0.000642,0.396955,0.01628,0.006389
4,000cce7e-bad4-11e8-b2b8-ac1f6b6435d0,0.252206,0.000484,0.013611,0.003946,0.030565,0.063434,0.005719,0.012214,0.003841,...,0.008951,0.170868,0.004416,0.022637,0.015988,0.071211,0.007,0.763048,0.016874,0.008542


In [9]:
result = df.as_matrix()
iDs = result[:,0]
Yhat = result[:,1:]

print(iDs.shape)
print(Yhat.shape)


(11702,)
(11702, 28)


In [10]:
(Yhat>0.5).sum(axis=0)

array([5078,  357,  919,  264,  274,  539,  248,  798,    5,    6,    2,
        286,  173,  136,  390,    3,  202,  168,  250,  299,   32, 1397,
        138,  780,  122, 3278,   97,    2])

In [84]:
pred = [ ' '.join( map(str,np.where( yhat > 0.4 )[0]) ) for yhat in  Yhat]
#code  = ' '.join( map(str, code) )
len(np.where( np.array([1,2,3]) < 0 )[0]) 

0

In [45]:
# submission_filepath = 'submission.csv'
# th=0.5
# #submission = [ {'Id': Id, 'Predicted': ' '.join( map(str, np.where( yhat > th )[0])  if len(np.where( yhat > th )[0])>0 else '0'  ) }  for (Id, yhat) in zip( iDs,Yhat) ]
# submission = [ {'Id': Id, 'Predicted': ' '.join( map(str, np.where( yhat > th )[0]) ) }  for (Id, yhat) in zip( iDs,Yhat) ]
# submission = pd.DataFrame( submission )
# submission.to_csv(submission_filepath, index=False, encoding='utf-8')
# print('SAVE!!!')
# submission


In [11]:
submission_filepath = 'submission.csv'
th=0.4
submission = pd.read_csv('/root/.kaggle/competitions/human-protein-atlas-image-classification/sample_submission.csv')

#submission = [ {'Id': Id, 'Predicted': ' '.join( map(str, np.where( yhat > th )[0])  if len(np.where( yhat > th )[0])>0 else '0'  ) }  for (Id, yhat) in zip( iDs,Yhat) ]
submission_local = [ {'Id': Id, 'Predicted': ' '.join( map(str, np.where( yhat > th )[0]) ) }  for (Id, yhat) in zip( iDs,Yhat) ]
submission_local = pd.DataFrame( submission_local )
submission.loc[ submission['Id'] == submission_local['Id']   , 'Predicted'] = submission_local['Predicted']
submission.to_csv(submission_filepath, index=False, encoding='utf-8')
submission



Unnamed: 0,Id,Predicted
0,00008af0-bad0-11e8-b2b8-ac1f6b6435d0,2 3
1,0000a892-bacf-11e8-b2b8-ac1f6b6435d0,19
2,0006faa6-bac7-11e8-b2b7-ac1f6b6435d0,0 25
3,0008baca-bad7-11e8-b2b9-ac1f6b6435d0,0
4,000cce7e-bad4-11e8-b2b8-ac1f6b6435d0,25
5,00109f6a-bac8-11e8-b2b7-ac1f6b6435d0,4 22
6,001765de-bacd-11e8-b2b8-ac1f6b6435d0,0 4 25
7,0018641a-bac9-11e8-b2b8-ac1f6b6435d0,0 25
8,00200f22-bad7-11e8-b2b9-ac1f6b6435d0,0
9,0026f154-bac6-11e8-b2b7-ac1f6b6435d0,25


In [12]:
!kaggle competitions submit -c human-protein-atlas-image-classification -f submission.csv -m "preactresnet18+focalloss+64x64"

100%|########################################| 468k/468k [00:04<00:00, 98.9kB/s]
Successfully submitted to Human Protein Atlas Image Classification

In [24]:
submission = pd.read_csv('/root/.kaggle/competitions/human-protein-atlas-image-classification/sample_submission.csv')
submission

Unnamed: 0,Id,Predicted
0,00008af0-bad0-11e8-b2b8-ac1f6b6435d0,0
1,0000a892-bacf-11e8-b2b8-ac1f6b6435d0,0
2,0006faa6-bac7-11e8-b2b7-ac1f6b6435d0,0
3,0008baca-bad7-11e8-b2b9-ac1f6b6435d0,0
4,000cce7e-bad4-11e8-b2b8-ac1f6b6435d0,0
5,00109f6a-bac8-11e8-b2b7-ac1f6b6435d0,0
6,001765de-bacd-11e8-b2b8-ac1f6b6435d0,0
7,0018641a-bac9-11e8-b2b8-ac1f6b6435d0,0
8,00200f22-bad7-11e8-b2b9-ac1f6b6435d0,0
9,0026f154-bac6-11e8-b2b7-ac1f6b6435d0,0
