In [1]:
from __future__ import print_function, division
import warnings
warnings.filterwarnings("ignore")

import os
import sys
import math
import operator
import numpy as np
import pandas as pd
import pickle as pkl
import tifffile as tif
import matplotlib.pyplot as plt
from multiprocessing import Pool
from sklearn.utils import shuffle
from keras import layers
from keras import models
from keras.utils import Sequence
from keras.models import Sequential
from keras.models import load_model, save_model, model_from_json
from keras.layers import Dense, Flatten, Embedding

os.chdir("../../")

Using TensorFlow backend.


In [2]:
class ValidationImageGenerator(Sequence):
    
    def __init__(self, x_metadata,batch_size, crop_size):
        self.x = x_metadata
        self.batch_size = batch_size
        self.cp = crop_size
        self.dic = {0:[0,120,165,210],1:[35,62,85],2:[7,22,50],3:[0,1,2,3,4],4:[20,60,140],5:[60,100],6:[0,1,2,3,4],7:[1,2,4,8],8:[1,2],9:[0,1,2,3,4]}
        self.conv_dic = {0:[133,1176],1:[-10.00984,18.36730],2:[7.846126,20.94560],3:[41.182110,59.95573],4:[302.772980,777.74048],5:[6.182446,36.54550],6:[-28.248663,5.33183],7:[16.744829,41.94211],8:[-14.122952,22.96798],9:[-17.672335,26.44534],10:[-2.738379,26.44534],11:[-17.672335,11.73241],12:[318.297485,2543.30225],13:[43.063732,285.43790],14:[3.022581,135.58406],15:[8.283675,57.78888],16:[121.616867,855.52594],17:[19.868601,421.27750],18:[19.868601,851.60620],19:[60.590000,520.31244],20:[-187.999999,4672.000000]}
    
    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))
    
    def binarization(self,image,un):
        img = np.zeros((64,64,len(un)))
        for i in range(len(un)):
            img[:,:,i] = (image.copy())
            img[:,:,i][img[:,:,i] != un[i]] = 0
        return img   
    
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        x = []
        for i in range(len(batch_x)):
            tempf = tif.imread(batch_x[i])[:21,:,:]
            for k in range(21):
                tempf[k] = self.conv_dic[k][0] + (self.conv_dic[k][1] - self.conv_dic[k][0]) * ((tempf[k]/255.0) - 0.1) / 0.8
            tempf = np.transpose(tempf,(1,2,0))
            
            l = []
            temp = tif.imread(batch_x[i])[21:,:,:]
            for k in range(10):
                un = np.array(self.dic[k])
                un = un[un != 0]
                img=np.transpose(self.binarization(temp[k],un),(2,0,1)).tolist()
                l.extend(img)
            x.append(np.concatenate((tempf,np.transpose(np.array(l),(1,2,0))), axis=2))
        return np.array(x)

class Val_load():
    
    def init_load(self, root_dir):
        self.path = root_dir
    
    def validation_data_loading(self,root_dir):
        self.x_val=[]
        self.patch_ids=[]
        for filename in os.listdir(root_dir):
            for image in os.listdir(root_dir+'/'+str(filename)):
                self.x_val.extend([root_dir+'/'+str(filename)+'/'+str(image)])
                self.patch_ids.append(int(image.split("_")[1].split(".")[0]))
                

val_data = Val_load()
val_data.validation_data_loading(root_dir='patchTest')
val_data.x_val=np.array(val_data.x_val)
val_data.x_val.shape

(72849,)

In [3]:
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
classifier = model_from_json(loaded_model_json)
# load weights into new model
classifier.load_weights("Code/Models/RCNN_ResNext_Latest.h5")
print("Loaded model from disk")

Loaded model from disk


In [4]:
pred=pd.DataFrame()
validation_x=ValidationImageGenerator(val_data.x_val,10,16)
predictions=classifier.predict_generator(validation_x)
pred=pd.concat([pred,pd.DataFrame(predictions[:,-1,:])], axis=0)

In [5]:
pred.to_csv("TEST-28-MAY-2018.csv",sep=',',index=False)
pkl.dump(val_data.patch_ids, open("patch_ids.pkl","wb"))

In [2]:
t = pkl.load(open("Data/final_embeddings.pkl","rb"),encoding='latin')
e = []
for i in range(1,3337):
    e.append(t[str(i)])

In [3]:
pred = pd.read_csv("TEST-28-MAY-2018.csv")
patch_ids = pkl.load(open("patch_ids.pkl","rb"))

In [4]:
def hyperbolic_distances(z,w):
    return np.average(np.arccosh(1 + (2 * (np.abs(z-w)**2)) / ((1-np.abs(z)**2)*(1-np.abs(w)**2))))

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=0)

def embeddings_to_prob(pred, patch_ids, embeddings, max_ranks):
    master_df = pd.DataFrame(columns=['patch_id','species_id','prob','rank']) 
    patches = np.array([patch_ids]*max_ranks).reshape((-1,1))
    dist_indices = []
    for j in range(3336):
        dist_indices.append(hyperbolic_distances(pred,e[j]))
    dist_indices = np.array(dist_indices)
    min_indices = dist_indices.argsort()[:max_ranks]
    probs = softmax(1.0/dist_indices[min_indices.tolist()]).reshape((-1,1))
    species_ids = np.array(min_indices+1.0).reshape((-1,1))
    ranks = np.array(list(range(1,max_ranks+1))).reshape((-1,1))
    master_df = pd.concat([master_df,pd.DataFrame(np.concatenate((patches,species_ids,probs,ranks),axis=1),columns=['patch_id','species_id','prob','rank'])],axis=0)
    master_df['patch_id'] = master_df['patch_id'].astype(int)
    master_df['species_id'] = master_df['species_id'].astype(int)
    master_df['rank'] = master_df['rank'].astype(int)
#         print(master_df.head())|
    return master_df        

In [5]:
num_ranks = int(input("How many ranks for each image : "))

How many ranks for each image : 100


In [6]:
l = []
for i in range(0,pred.shape[0]):
    l.append((pred.values[i], patch_ids[i]))

In [7]:
def f(l1):
    return embeddings_to_prob(l1[0],l1[1],e,num_ranks)

In [8]:
from multiprocessing import Pool
p = Pool(14)
res = p.map(f,l)

In [12]:
mpd = pd.concat(res,axis=0)

In [15]:
mpd.to_csv("SSN_CS_19_run4.csv",sep=";",index=False,header=False)

In [None]:
mpd = pd.DataFrame()
for i in range(0,p.shape[0],20000):
    print("Main Iter : ", i/20000)
    if(i==0):
        mpd = prediction_to_species(p, num_ranks)
    else:
        mpd = pd.concat([mpd, prediction_to_species(p, num_ranks)], axis=0)
mpd.to_csv("MLRG_SSN3_run100.csv",sep=";",index=False,header=False)