In [1]:
import os
import sys
import numpy as np
import pandas as pd
import random

from argparse import ArgumentParser
import datetime

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.backends.cudnn as cudnn

from pytvision.transforms import transforms as mtrans
from pytvision import visualization as view

sys.path.append('../')
from torchlib.datasets.datasets import ATLASDataset 
from torchlib.neuralnet import NeuralNetClassifier
from misc import (get_transforms_det, get_transforms_hflip, get_transforms_vflip )

from sklearn import metrics

In [2]:
pathname = os.path.expanduser( '~/.datasets' )
projectnet='../out/netruns'
no_cuda=False
seed=1
gpu=0
batch_size=10
workers=0
num_input_channels=3
parallel=False

print(pathname)
print(projectnet)

/home/fernandez/.datasets
../out/netruns


In [3]:
!ls ../out/netruns/

atlas_baseline_preactresnet18_bcewl_adam_atlas_001  results
atlas_baseline_preactresnet18_mix_sgd_atlas_002


In [4]:
name = 'atlas_baseline_preactresnet18_mix_sgd_atlas_002'
project = os.path.join(projectnet, name)
pathnameout = os.path.join(projectnet, 'results' )
model = 'model_best.pth.tar' #'chk000025.pth.tar', model_best
pathmodels = os.path.join(project,'models',model)

print(project)
print(pathmodels)

../out/netruns/atlas_baseline_preactresnet18_mix_sgd_atlas_002
../out/netruns/atlas_baseline_preactresnet18_mix_sgd_atlas_002/models/model_best.pth.tar


In [5]:
# neuralnet
network = NeuralNetClassifier(
    patchproject=project,
    nameproject=name,
    no_cuda=no_cuda,
    seed=seed,
    gpu=gpu,
    parallel=parallel,
    )

cudnn.benchmark = True

# load model
if network.load( pathmodels ) is not True:
    assert(False)

Without the incoming socket you cannot receive events from the server or register event handlers to your Visdom client.


=> loading checkpoint '../out/netruns/atlas_baseline_preactresnet18_mix_sgd_atlas_002/models/model_best.pth.tar'
=> loaded checkpoint for preactresnet18 arch!


In [8]:
pathdata='~/.kaggle/competitions/human-protein-atlas-image-classification'
name_dataset='atlas'
subset='test'

tta_preprocess = [ 
    get_transforms_det( network.size_input ), 
    get_transforms_hflip(network.size_input), 
    get_transforms_vflip(network.size_input), 
    #get_transforms_gray(network.size_input),
    #get_transforms_aug(network.size_input),
    #get_transforms_aug2(network.size_input)
    ]

dataloaders = []
for transform in tta_preprocess:    
    # test dataset    
    data = ATLASDataset(        
        path=pathdata, 
        train=False,
        folders_images='test', 
        metadata='sample_submission.csv',
        #count=100,
        num_channels=3,
        transform=transform,
        )
    dataloader = DataLoader(data, batch_size=batch_size, shuffle=False, num_workers=workers )   
    dataloaders.append(dataloader)

print(len(dataloaders))

3


In [9]:
random.seed( seed )
files = [ f for f in sorted(os.listdir(pathnameout)) if f.split('.')[-1] == 'csv' ]

for i,data in enumerate(dataloaders):
    Id, Yhat, Y = network.predict( data )
    df = pd.DataFrame( np.concatenate((Id[:,np.newaxis], Yhat), axis=1) )
    df.to_csv( os.path.join(pathnameout , '{}_{}_{}_{}_dp.csv'.format(subset,name,name_dataset, i + len(files))), index=False, encoding='utf-8')            

print('dir: {}'.format(pathnameout))
print('DONE!!!')

100%|██████████| 1171/1171 [03:26<00:00,  5.66it/s]
100%|██████████| 1171/1171 [03:26<00:00,  5.67it/s]
  1%|▏         | 17/1171 [00:02<03:21,  5.72it/s]

KeyboardInterrupt: 

In [10]:
files = [ f for f in sorted(os.listdir(pathnameout)) if f.split('.')[-1] == 'csv' ]; 
print(files)

l = len(files)
dp =[]; ids=[]
for f in files:  
    mdata = pd.read_csv( os.path.join(pathnameout , f )  )
    dpdata = mdata.as_matrix()
    ids.append(dpdata[:,0])    
    dp.append( dpdata[:,1:] )
    
dp = np.array(dp).transpose((1,2,0))
ids = np.array(ids)

print(dp.shape)

['test_atlas_baseline_preactresnet18_bcewl_adam_atlas_001_atlas_dp.csv', 'test_atlas_baseline_preactresnet18_mix_sgd_atlas_002_atlas_6_dp.csv', 'test_atlas_baseline_preactresnet18_mix_sgd_atlas_002_atlas_7_dp.csv']
(11702, 28, 3)


In [11]:
def mean_ruler( dp ):
    """
    Ecuation. Josef Kittler [18]
    https://dspace.cvut.cz/bitstream/handle/10467/9443/1998-On-combining-classifiers.pdf?sequence=1
    Soft Median Ruler
    Args:
        @dp: []_nxcxl
    """
    p = dp.mean(2)
    return p
    

In [13]:
iDs = ids[0,:]
P = mean_ruler(dp)
th=0.4

#pred = np.argmax(p, axis=1)
pred = [ ' '.join( map(str,np.where( p > th )[0]) ) for p in P]

print(dp.shape)
print(P.shape)
#print(pred)

(11702, 28, 3)
(11702, 28)


In [14]:
submission_filepath = 'submission.csv'
th=0.4
submission = pd.read_csv('~/.kaggle/competitions/human-protein-atlas-image-classification/sample_submission.csv')

#submission = [ {'Id': Id, 'Predicted': ' '.join( map(str, np.where( yhat > th )[0])  if len(np.where( yhat > th )[0])>0 else '0'  ) }  for (Id, yhat) in zip( iDs,Yhat) ]
submission_local = [ {'Id': Id, 'Predicted': ' '.join( map(str, np.where( p > th )[0]) ) }  for (Id, p) in zip( iDs,P) ]
submission_local = pd.DataFrame( submission_local )
submission.loc[ submission['Id'] == submission_local['Id']   , 'Predicted'] = submission_local['Predicted']
submission.to_csv(submission_filepath, index=False, encoding='utf-8')
submission

Unnamed: 0,Id,Predicted
0,00008af0-bad0-11e8-b2b8-ac1f6b6435d0,2
1,0000a892-bacf-11e8-b2b8-ac1f6b6435d0,5
2,0006faa6-bac7-11e8-b2b7-ac1f6b6435d0,0 25
3,0008baca-bad7-11e8-b2b9-ac1f6b6435d0,0 25
4,000cce7e-bad4-11e8-b2b8-ac1f6b6435d0,
5,00109f6a-bac8-11e8-b2b7-ac1f6b6435d0,4
6,001765de-bacd-11e8-b2b8-ac1f6b6435d0,4 25
7,0018641a-bac9-11e8-b2b8-ac1f6b6435d0,0 25
8,00200f22-bad7-11e8-b2b9-ac1f6b6435d0,0
9,0026f154-bac6-11e8-b2b7-ac1f6b6435d0,25


In [15]:
!kaggle competitions submit -c human-protein-atlas-image-classification -f submission.csv -m "tta-preactresnet18+bceloss+128x128"

100%|█████████████████████████████████████████| 466k/466k [00:01<00:00, 378kB/s]
Successfully submitted to Human Protein Atlas Image Classification