In [1]:
import os
import sys
import math
import operator
import numpy as np
import pandas as pd
import pickle as pkl
import tifffile as tif
from keras.layers import Dense
from keras.layers import Conv2D
from multiprocessing import Pool
from keras.utils import Sequence
from keras.layers import Flatten
from keras.layers import MaxPool2D
from keras.models import Sequential 
from keras.layers import Reshape
from collections import OrderedDict
from keras.layers import TimeDistributed
from keras.layers import LSTM
from keras.layers import Permute
from keras.models import load_model, save_model
os.chdir("../../")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class ImageDataGenerator(Sequence):
    
    def __init__(self, x_metadata, y_metadata, batch_size, crop_size):
        self.x = x_metadata
        self.y = y_metadata
        self.batch_size = batch_size
        self.cp = crop_size
    
    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))
    
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
            
        return np.array([np.transpose(np.array(tif.imread(file_name), dtype=int)/255.0,(1,2,0))
                         [self.cp:-self.cp,self.cp:-self.cp,:] for file_name in batch_x]), np.array(batch_y)         

class CNN_Model:
    
    def __init__(self, directory,class_name):
        
        self.onehot = {}
        self.path = directory
        
        df = pd.read_csv("occurrences_train.csv",low_memory=False)
        with open("Data/hierarchy_data.pkl","rb") as f:
            hd = pkl.load(f)
        with open("Data/class_encoding.pkl","rb") as f:
            self.classes = pkl.load(f)
        with open("Data/order_encoding.pkl","rb") as f:
            self.orders = pkl.load(f)
        with open("Data/family_encoding.pkl","rb") as f:
            self.families = pkl.load(f)
        with open("Data/genus_encoding.pkl","rb") as f:
            self.genuses = pkl.load(f)
        with open("Data/specie_encoding.pkl","rb") as f:
            self.species = pkl.load(f)

        self.onehot_output()

        self.train_pathdata_x = []
        self.train_seq_y = []
        self.test_pathdata_x = []
        self.test_seq_y = []
        
        #for cls in hd.keys():
        cls = class_name
        
        for order in hd[cls].keys():
            for family in hd[cls][order].keys():
                for genus in hd[cls][order][family].keys():
                    for specie in hd[cls][order][family][genus]:
                        for im in os.listdir(self.path+"train/"+str(self.classes[cls])+"/"+str(self.orders[order])
                                             +"/"+str(self.families[family])+"/"+str(self.genuses[genus])+"/"+str(specie)):
                            self.train_pathdata_x.append(self.path+"train/"+str(self.classes[cls])+"/"+str(self.orders[order])
                                                         +"/"+str(self.families[family])+"/"+str(self.genuses[genus])+"/"+str(specie)+"/"
                                                         +im)
                            
        #for cls in hd.keys():
        
        for order in hd[cls].keys():
            for family in hd[cls][order].keys():
                for genus in hd[cls][order][family].keys():
                    for specie in hd[cls][order][family][genus]:
                        for im in os.listdir(self.path+"test/"+str(self.classes[cls])+"/"+str(self.orders[order])
                                             +"/"+str(self.families[family])+"/"+str(self.genuses[genus])+"/"+str(specie)):
                            self.test_pathdata_x.append(self.path+"test/"+str(self.classes[cls])+"/"+str(self.orders[order])
                                                         +"/"+str(self.families[family])+"/"+str(self.genuses[genus])+"/"+str(specie)+"/"
                                                         +im)

        np.random.shuffle(self.train_pathdata_x)
        np.random.shuffle(self.test_pathdata_x)
        
        df = pd.read_csv("occurrences_train.csv",low_memory=False)
        df = df[['class','species_glc_id']]
        
        
        for p in self.train_pathdata_x:
            y = p.split("/")
            c = int(y[3])
            o = int(y[4])
            f = int(y[5])
            g = int(y[6])
            s = int(y[7])
            d = list(df[df['class']==cls]['species_glc_id'].unique())
            z = np.zeros((len(d)))
            z[d.index(s)] = 1
            self.train_seq_y.append(z)
            
        for p in self.test_pathdata_x:
            y = p.split("/")
            c = int(y[3])
            o = int(y[4])
            f = int(y[5])
            g = int(y[6])
            s = int(y[7])
            d = list(df[df['class']==cls]['species_glc_id'].unique())
            z = np.zeros((len(d)))
            z[d.index(s)] = 1
            self.test_seq_y.append(z)
        
        print(np.array(self.train_pathdata_x).shape, np.array(self.train_seq_y).shape, np.array(self.test_pathdata_x).shape,np.array(self.test_seq_y).shape)
        
    def onehot_output(self):
        for sp in self.species:
            y = np.zeros(len(self.species))
            y[list(self.species).index(sp)] = 1
            self.onehot[sp] = y
            
    def model_create(self, dense_out, time_steps=5, batch_size=32):
        
        classifier = Sequential()
        # Step 1 - Convolution
        classifier.add(Conv2D(filters=64, kernel_size=(1, 1), input_shape = (32,32,33), activation = 'relu'))
        classifier.add(Conv2D(filters=96, kernel_size=(3, 3), activation = 'relu'))
        classifier.add(Conv2D(filters=128, kernel_size=(3, 3), activation = 'relu'))
        classifier.add(Conv2D(filters=16, kernel_size=(5, 5), activation = 'relu'))
        classifier.add(Conv2D(filters=32, kernel_size=(5, 5), activation = 'relu'))
        #classifier.add(Conv2D(filters=, kernel_size=(2, 2), activation = 'relu'))
        classifier.add(MaxPool2D(pool_size = (2, 2)))
        # Step 3 - Flattening
        classifier.add(Flatten())
        # Step 4 - Full connection
        classifier.add(Dense(128, activation = 'relu'))
        #classifier.add(Dense(128, activation = 'tanh'))
        classifier.add(Dense(dense_out, activation = 'softmax'))
        # Compiling the CNN
        if(dense_out >= 300):
            classifier.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
        elif(dense_out == 1):
            classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
        else:
            classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
        classifier.summary()
        return classifier
    
    def fit_generator(self, class_name, num_epochs=10, batch_size=32, crop_size=16, time_steps=5):        
        print("Class : ", class_name)
        try:
            classifier = load_model("Code/Models/"+str(class_name)+".h5")
        except:
            print ("-------------------------------------------------------")
            print("Training")
            classifier = self.model_create(dense_out=np.array(self.train_seq_y).shape[1], time_steps=time_steps, batch_size=batch_size)
            train_data = ImageDataGenerator(self.train_pathdata_x, self.train_seq_y, batch_size, crop_size)
            history = classifier.fit_generator(train_data, epochs=num_epochs, use_multiprocessing=True,shuffle=True)
            classifier.save("Code/Models/"+str(class_name)+".h5")
            
        print ("---------------------------------------------------")    
        print("Testing")
        test_data = ImageDataGenerator(self.test_pathdata_x, self.test_seq_y, batch_size, crop_size)
        scores = classifier.evaluate_generator(test_data, use_multiprocessing=True)
        print("Loss : ", scores[0])
        print("Accuracy : ", scores[1])
        print ("-------------------------------------------------------")
        del classifier

In [3]:
with open("Data/class_encoding.pkl","rb") as f:
    classes = pkl.load(f)

classes_to_names = {}
for key,val in classes.items():
    classes_to_names[val] = key
    
for cls in list(classes.keys()):
    ob = CNN_Model("Data/Hierarchial Data/",cls)
    ob.fit_generator(cls, num_epochs=25, batch_size=30, time_steps=5)

(1270,) (1270, 39) (510,) (510, 39)
Class :  Pinopsida
---------------------------------------------------
Testing
Loss :  2.7298653967240276
Accuracy :  0.2823529484517434
-------------------------------------------------------
(188,) (188, 9) (100,) (100, 9)
Class :  Lycopodiopsida
-------------------------------------------------------
Training
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 64)        2176      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 30, 96)        55392     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 28, 128)       110720    
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 24, 24, 16)        51216     
________________________________________________________

KeyboardInterrupt: 

In [None]:
#TESTING.
counter=0
nparray = np.zeros(shape=(num_test_images,33,32,32),dtype=np.float32)
for i in #path of test images.
    #Crop each image to a size of 32 x 32.
    nparray[counter] = crop_center(tiff.imread(i),32,32)/255.0
    counter+=1
    
#Now make predictions, using the initial class prediction model.
class_predictor = load_model('CNN_1.h5')
yPred = class_predictor.predict(nparray)
#Now, based on yPred call each of the corresponding models.
final_pred = []
for i in range(yPred.shape[0]):
    if(yPred[i]==0):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==1):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==2):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==3):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==4):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==5):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==6):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==7):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==8):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))
    elif(yPred[i]==9):
        model = load_model(classes_to_names[yPred[i]]+".h5")
        final_pred.append(model.predict(nparray[i]))

In [None]:
ob.fit_generator(num_epochs=10, batch_size=30, time_steps=5)