In [None]:
import os
os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu0,floatX=float32" 

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import time
import cv2
import random
import os
import h5py
#import models
import pandas as pd
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import ShuffleSplit
import datetime

#import pandas as pd
import glob
from tabulate import tabulate
import os
import datetime
from keras.models import load_model


# get package versions
def get_version(*vars):
    for var in vars:
        module = __import__(var)    
        print ('%s: %s' %(var,module.__version__))
    
# package version    
get_version('keras','numpy','matplotlib','cv2','theano')

### Settigns

In [None]:

# normalization 
#norm_type='minus1plus1'
norm_type='zeroMeanUnitVar'

# data folder
path2data='../data/'

# stage 1 training data
path2stage1_train=path2data+'stage1_train.hdf5'

# stage 1 test data
path2stage1_test=path2data+'stage1_test.hdf5'

# stage 1 sample submission
path2stage1submission = path2data+'stage1_sample_submission.csv'
path2_stage1_labels=path2data+"stage1_labels.csv"
path2_stage1_solution=path2data+"stage1_solution.csv"

# stage 2 sample submission
path2stage2submission = path2data+'stage2_sample_submission.csv'

# zone orders in the csv file are like this
zones=['zone1','zone10','zone11','zone12',
       'zone13','zone14','zone15','zone16',
       'zone17','zone2','zone3','zone4',
       'zone5','zone6','zone7','zone8','zone9']

body_zone_desc={
        'zone1' :'Right Bicep',
        'zone10':'Upper left Hip/thigh',
        'zone11':'Lowe Right Thigh',
        'zone12':'Lower left Thigh',
        'zone13':'Right Calf',
        'zone14':'Left Calf(below knee)',
        'zone15':'Right Ankle Bone',
        'zone16':'Left Ankle Bone',
        'zone17':'Upper Back',    
        'zone2':'Right Forearm',
        'zone3':'Left Bicep',
        'zone4':'Left Forearm',
        'zone5':'Upper Chest',
        'zone6':'Right Rib Cage and Abs',
        'zone7':'Left Side Rib Cage and Abs',
        'zone8':'Upper Right Hip/Tigh',
        'zone9':'Groin (Sensetive area)'
        } 

### Utilities

In [None]:
def array_stats(X):
    X=np.asarray(X)
    print ('array shape: ',X.shape, X.dtype)
    #print 'min: %.3f, max:%.3f, avg: %.3f, std:%.3f' %(np.min(X),np.max(X),np.mean(X),np.std(X))
    print ('min: {}, max: {}, avg: {:.3}, std:{:.3}'.format( np.min(X),np.max(X),np.mean(X),np.std(X)))

def preprocess(X,xnormType=None):
    if xnormType=='minus1plus1':
        X=X.astype('float32')
        X/=np.max(X)
        X-=0.5
        X=X*2
    elif xnormType=='zeroMeanUnitVar':
        X=X.astype('float32')
        X-=np.mean(X)
        stdX=np.std(X)
        if stdX>0.0:
            X/=stdX
    else:
        print('no normalization type found!!')
        pass
        
    return X
    
def logloss(y_true,y_pred):
    sumY=[]
    n1,n2=y_true.shape
    for i1 in range(n1):
        for i2 in range(n2):
            yi=y_true[i1,i2]
            yih=y_pred[i1,i2]
            # clip outputs
            yi=max(min(yi,1-10**(-15)),10**(-15))
            yih=max(min(yih,1-10**(-15)),10**(-15))
            
            # calculate log loss
            p1=yi*np.log(yih)+(1-yi)*np.log(1-yih)
            sumY.append(p1)
    return -np.mean(sumY)

def getPath2data4model(path2model,data_type="",path2data=""):
    sub=os.path.basename(path2model)
    if "512" in sub and "660" in sub:
        h,w=512,660
    else:    
        h,w=256,330
    
    if "Rotate90" in sub:
        data_type=data_type+"90"
    elif "Rotate180" in sub:
        data_type=data_type+"180"
    elif "Rotate270" in sub:
        data_type=data_type+"270"
        
    path2data4model=path2data+data_type+'_'+str(h)+'by'+str(w)+'.hdf5'
    print (path2data4model)                

    return path2data4model

# fix optimization error in keras models
def fix_files_in_folder_and_subfolders(path):
    print "Deleting optimizer weights from:"
    for item in os.listdir(path):
        if item.lower().endswith((".hdf5",)) or item.lower().endswith((".h5",)):
            model_file = os.path.join(path, item)
            print model_file
            with h5py.File(model_file, 'a') as f:
                if 'optimizer_weights' in f.keys():
                    print model_file
                    del f['optimizer_weights']
        else:
            subpath = os.path.join(path, item)
            if os.path.isdir(subpath):
                fix_files_in_folder_and_subfolders(subpath)


# display a sample subject
def dispSampleSubject(X,y_zone=None): 
    # X shape: N*C*H*W
    sbj_num=np.random.randint(len(X))
    print ('subject: %s' %sbj_num)
    #array_stats(X[sbj_num])

    plt.figure(figsize=(15,15))
    for k in range(16):
        plt.subplot(4,4,k+1)
        plt.imshow(X[sbj_num,k],cmap='gray')
    if y_zone is not None:
        # zones with objects
        nz_label=np.nonzero(y_zone[sbj_num,:])[0]
        for nz_l in nz_label:
            print ('%s: %s' %(zones[nz_l],body_zone_desc[zones[nz_l]]))


### fetching list of models/weights

In [None]:
# weights 
path2weights='./output/weights/'

# sort by time
weightList = os.listdir(path2weights)
weightList.sort(key=lambda x: os.path.getmtime(path2weights+x))

for i,sub in enumerate(weightList):
    print ('%s- %s \n' %(i,os.path.basename(sub)))
    try:
        path2model=glob.glob(path2weights+sub+'/*.h5')[0]
    except:
        print('there is no h5 file!')


In [None]:
# fix optimization error in keras models if needed
#fix_files_in_folder_and_subfolders(path2weights)

## Inference on stage1 leaderboard

In [None]:
%%script false

# normalization 
norm_type='zeroMeanUnitVar'

# number of zones 
nb_zones=17

# read stage 1 leaderboard labels
stage1_test=pd.read_csv(path2_stage1_solution)
Probability=stage1_test.Probability
y_zone1=np.array(Probability)
y_stage1=np.reshape(y_zone1,(len(y_zone1)/nb_zones,nb_zones))
print ('labels shape:', y_stage1.shape)

# specify what data 
data_type="stage1_leader"

yPreds=[]
for wn,sub in enumerate(weightList):

    # get path to correct data for a model
    path2_stage1leader=getPath2data4model(sub,data_type,path2data)    
    
    # load leaderborad data
    try:
        ff_stage1leader=h5py.File(path2_stage1leader,'r')
        ids_stage1leader=ff_stage1leader['ids'].value
        X_stage1leader=ff_stage1leader['X'].value
        array_stats(X_stage1leader)
        #dispSampleSubject(X_stage1leader,y_stage1)
   
        print ('%s- %s \n' %(wn,os.path.basename(sub)))
        path2model=glob.glob(path2weights+sub+'/*.h5')[0]
        model=load_model(path2model)
        #model.summary()
        print('wait ...')
        try:
            #score_test=model.evaluate(preprocess(X_stage1leader,norm_type)[:,:,np.newaxis],y_stage1,verbose=0,batch_size=8)
            yPredPerModel=model.predict(preprocess(X_stage1leader,norm_type)[:,:,np.newaxis],batch_size=8)
            yPreds.append(yPredPerModel)
            score_test=logloss(y_stage1,yPredPerModel)
            print ('score test is %s' %(score_test))    
        except:
            print('could not get score test!')
        print('-'*60)
    except:   
        print('could not load data!')
        print('-'*60)
        
# Score test for ensemble
y_pred1=np.array(yPreds)
y_pred2=np.mean(y_pred1,axis=0)
print y_pred2.shape
scoreEnsemble=logloss(y_stage1,y_pred2)
print('ensemble score for %s models: %s' %(len(yPreds),scoreEnsemble))

r1,c1=y_pred2.shape
#print y_pred2[0:5]
#print (r1,c1)
pred=np.reshape(y_pred2,(r1*c1,1))
print (pred.shape)        

## Inference for Stage2 leaderboard

In [None]:
# normalization 
norm_type='zeroMeanUnitVar'

# number of zones 
nb_zones=17

# specify what data 
data_type="stage2_leader"

yPreds=[]
for wn,sub in enumerate(weightList):

    # get path to correct data for a model
    path2_stagenleader=getPath2data4model(sub,data_type,path2data)    
    
    # load leaderborad data
    try:
        ff_stagenleader=h5py.File(path2_stagenleader,'r')
        ids_stagenleader=ff_stagenleader['ids'].value
        X_stagenleader=ff_stagenleader['X'].value
        array_stats(X_stagenleader)
        #dispSampleSubject(X_stagenleader)
   
        print ('%s- %s \n' %(wn,os.path.basename(sub)))
        path2model=glob.glob(path2weights+sub+'/*.h5')[0]
        model=load_model(path2model)
        #model.summary()
        print('wait ...')
        try:
            yPredPerModel=model.predict(preprocess(X_stagenleader,norm_type)[:,:,np.newaxis],batch_size=8)
            yPreds.append(yPredPerModel)
            print (yPredPerModel[0])
            print('compeleted predictions!')
        except:
            print('could not get score test!')
        print('-'*60)
    except:   
        print('could not load data!')
        print('-'*60)
        
# Score test for ensemble
y_pred1=np.array(yPreds)
y_pred2=np.mean(y_pred1,axis=0)
print y_pred2.shape
#scoreEnsemble=logloss(y_stage1,y_pred2)
#print('ensemble score for %s models: %s' %(len(yPreds),scoreEnsemble))
print('ensemble of %s models' %(len(yPreds)))

r1,c1=y_pred2.shape
#print y_pred2[0:5]
#print (r1,c1)
pred=np.reshape(y_pred2,(r1*c1,1))
print (pred.shape)        

## create submission

In [None]:
#%%script false 
submission  = pd.read_csv(path2stage2submission)
pid = submission ['Id'].values

# make submission
now = datetime.datetime.now()
info="submission_ensemble11"
suffix = info + '_' + str(now.strftime("%Y-%m-%d-%H-%M"))
path2submission = os.path.join('./output/submissions', 'submission_' + suffix + '.csv')

#submission = pd.DataFrame(pred, columns=['Probability'])
submission['Id'] = pid
submission['Probability'] = pred
submission.to_csv(path2submission, index=False)
submission.head()

