## Import Libraries

In [3]:
import numpy as np
from matplotlib import pyplot as plt

#Data Read
import imageio as io
import os
from glob import glob
import pandas as pd
from sklearn.model_selection import KFold

# Data Augmentation
from scipy.ndimage import interpolation as ip

# Image Resizing
from skimage.transform import resize
from skimage import img_as_ubyte

# Histogram of Gradients
from skimage.feature import hog

# Local Binary Patterns
from skimage.feature import local_binary_pattern as lbp
from skimage import color
from sklearn.preprocessing import normalize
import itertools
from numpy import linalg as la

# Bag of Words using KMeans
from sklearn.cluster import KMeans
import collections

In [4]:
# Classification and High Level Feature Generation
from keras import backend as K
from keras.models import Sequential,Model
from keras.layers import Input,Dense, Activation, Dropout
from keras import regularizers, optimizers
from keras.callbacks import EarlyStopping

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_curve,auc

## Data Read

In [5]:
def sort_key(path):
    return int(os.path.basename(path)[:-4])

In [6]:
def get_files(path):
    files = []
    files = glob(os.path.join(path,'*.jpg'))
    files.sort(key=sort_key)
    return files

In [7]:
def get_images(image_paths):
    images = []
    for path in image_paths:
        image = io.imread(path)
        images.append(image)
    
    return images

## Read Labels

In [8]:
def read_labels(path,names):
    labels_df = pd.read_excel(path,names=names)
    labels = labels_df.get_values()
    return labels

## Data Augmentation

In [9]:
def augment_data(images, shift_factor, path, name): ##Image Translation in 8 directions
    image_name = name
    a = shift_factor
    #b = shift_factor1
    shifts = ([a,0,0], [0,a,0], [-a,0,0], [0,-a,0], [a,a,0], [-a,-a,0], [-a,a,0], [a,-a,0])
             #[a,b,0], [b,a,0], [-a,-b,0], [-b,-a,0], [-a,b,0], [b,-a,0], [-b,a,0], [a,-b,0])
    for image in images:
        for shift in shifts:
            image_translated = ip.shift(image,shift,mode='nearest')
            io.imwrite(path+'/'+str(image_name)+'.jpg',image_translated)
            image_name = image_name+1

In [10]:
def generate_augmented_labels(labels,num_shifts):
    augmented_labels = []
    num_labels = labels.shape[1]
    for row in labels:
        row = np.repeat(row,num_shifts)
        row = row.reshape((num_labels,num_shifts))
        row = row.transpose()
        augmented_labels.append(row)
        
    augmented_labels = np.asarray(augmented_labels)
    augmented_labels = augmented_labels.reshape((len(labels)*num_shifts,num_labels))
    
    return augmented_labels

In [11]:
def kfold_validation(training_data, training_labels, n_splits=5):
    kfold = KFold(n_splits=n_splits, shuffle=True, random_state=0)
    return kfold.split(training_data,training_labels)

In [3]:
def get_augmented_images_and_labels(augmented_data):
    
    augmented_data_list = list(itertools.chain.from_iterable(augmented_data))
    
    augmented_images = []
    augmented_labels = []
    for i in range(0,len(augmented_data_list)):
        item = augmented_data_list[i]
        if i%2 == 0:
            augmented_images.append(item)
        else:
            augmented_labels.append(item)
            
    augmented_images = list(itertools.chain.from_iterable(augmented_images))
    augmented_labels = list(itertools.chain.from_iterable(augmented_labels))
    augmented_labels = np.asarray(augmented_labels)
    
    return augmented_images, augmented_labels

## Image Resizing

In [12]:
def resize_images(images,resize_factor):
    resized_images = []
    
    width = (images[0].shape[0])*resize_factor 
    height = (images[0].shape[1])*resize_factor
    for image in images:
        resized_image = resize(image, (width,height), mode='reflect', anti_aliasing=True)
        resized_image = img_as_ubyte(resized_image)
        resized_images.append(resized_image)
     
    return resized_images

## Histogram of Oriented Gradients

In [13]:
def get_hog_features(images, cell_size, orientations=9):
    
    hog_features = []
    
    for image in images:
        fd = hog(image, orientations=orientations, pixels_per_cell=(cell_size,cell_size),
                    cells_per_block=(2, 2), block_norm='L2', visualize=False, multichannel=True)
        
        hog_features.append(fd)
        
    hog_features = np.asarray(hog_features)
    
    return hog_features

## Local Binary Patterns

In [14]:
def get_lbp_features(images,cell_size,neighbours=8,radius=1):
    
    lbp_features = []
    nbins = neighbours + 2
    
    for image in images:
        image = color.rgb2gray(image)
        image = lbp(image,neighbours,radius,method='uniform')
 
        image_blocks = []
        image_block_cols = np.hsplit(image,round(image.shape[1]/cell_size)) 
    
        for col in image_block_cols: 
            blocks = np.vsplit(col,round(image.shape[0]/cell_size))
            image_blocks.append(blocks)
            
        image_blocks = list(itertools.chain.from_iterable(image_blocks))
        
        lbp_block_histograms = []
        for block in image_blocks:
            hist = np.histogram(block, bins=np.arange(0,nbins+1), range=(0,nbins))[0]
            hist = hist/(la.norm(hist))
            lbp_block_histograms.append(hist)
                
        lbp_block_histograms = (np.asarray(lbp_block_histograms)).flatten()
        lbp_features.append(lbp_block_histograms)
        
    lbp_features = np.asarray(lbp_features)
        
    return lbp_features

## Bag Of Words-RGB Features

In [15]:
def generate_bow(images):
        
    shape = (images[0].shape[0]*images[0].shape[1],images[0].shape[2])
    bow = np.array(images)
    bow = bow.reshape((len(images),shape[0],shape[1]))
    bow = bow.reshape((bow.shape[0]*bow.shape[1],bow.shape[2]))
    bow = np.unique(bow, axis=0)
    bow = bow/255
    return bow    

In [16]:
def get_bow_features(images,k,kmeans):
    
    bow_features = []
    shape = (images[0].shape[0]*images[0].shape[1],images[0].shape[2])
    for image in images:
        image = image.reshape((shape))
        image = image/255
        predictions = kmeans.predict(image)
        pred_freq = collections.Counter(predictions)
        histogram = []
        for i in range(0,k):
            value = pred_freq[i]
            histogram.append(value)
        
        histogram = np.asarray(histogram)
        histogram = histogram/(la.norm(histogram))
        bow_features.append(histogram)
        
    bow_features = np.asarray(bow_features)
    
    return bow_features  

In [17]:
def generate_bow1(images):
    bow = []
    for image in images:
        image = image.reshape((image.shape[0]*image.shape[1],image.shape[2]))
        image = np.unique(image, axis=0)
        bow.append(image)
        
    bow = list(itertools.chain.from_iterable(bow))
    bow = np.asarray(bow) 
    bow = bow/255
    return bow    

## High Level Feature Generation

In [18]:
def custom_relu(x):
    return K.relu(x, alpha=0.0, max_value=1.0, threshold=0.0)

In [19]:
def define_autoencoder_model(input_size, hidden_size, hidden_activation='relu', output_activation='linear', dropout1=0.0, dropout2=0.0, regularizer=None):
    
    input_layer = Input(shape=(input_size,))
    encoder_layer = Dropout(dropout1)(input_layer)
    encoder_layer = Dense(hidden_size, activation=hidden_activation, activity_regularizer=regularizer)(encoder_layer)
    decoder_layer = Dropout(dropout2)(encoder_layer)
    decoder_layer = Dense(input_size, activation=output_activation)(decoder_layer)
    autoencoder_model = Model(input_layer, decoder_layer)
    encoder_model = Model(input_layer,encoder_layer)
    
    return autoencoder_model, encoder_model

In [20]:
def define_deep_autoencoder_model(input_size, hidden_size, hidden_size1, hidden_activation='relu', output_activation='linear', dropout1=0.0, dropout2=0.0, dropout3=0.0, dropout4=0.0):
    
    input_layer = Input(shape=(input_size,))
    encoder_layer = Dropout(dropout1)(input_layer)
    encoder_layer = Dense(hidden_size1, activation=hidden_activation)(encoder_layer)
    encoder_layer = Dropout(dropout2)(encoder_layer)
    encoder_layer = Dense(hidden_size, activation=hidden_activation)(encoder_layer)
    decoder_layer = Dropout(dropout3)(encoder_layer)
    decoder_layer = Dense(hidden_size1, activation=hidden_activation)(decoder_layer)
    decoder_layer = Dropout(dropout4)(decoder_layer)
    decoder_layer = Dense(input_size, activation=output_activation)(decoder_layer)
    autoencoder_model = Model(input_layer, decoder_layer)
    encoder_model = Model(input_layer,encoder_layer)
    
    return autoencoder_model, encoder_model

In [21]:
def autoencoder_compile_and_fit(model, train_data, num_epochs=100, lr=0.001, validation_split=0.15, validation_data=None, batch_size=None, callbacks=None):
    
    model.compile(optimizer=optimizers.Adam(lr=lr), loss='mse', metrics=['mse'])
    history = model.fit(train_data, train_data, epochs=num_epochs, shuffle=True, validation_split=validation_split, validation_data=validation_data, batch_size=batch_size, verbose=0, callbacks=callbacks)
    model_performance_vis(history,'MSE Loss','loss')
    return history

In [22]:
def generate_high_level_features(encoder_model,data):
    
    high_features_data = encoder_model.predict(data)
    
    return high_features_data

## Classification

In [23]:
def normalize_features(features):
    normalized_features = []
    for row in features:
        min_value = row.min()
        max_value = row.max()
        row = (row-min_value)/(max_value-min_value)
        normalized_features.append(row)
    
    normalized_features = np.asarray(normalized_features)
    return normalized_features

In [24]:
def define_LRL_model(input_size, output_size, dropout=0.0, regularizer=None):
    
    input_layer = Input(shape=(input_size,))
    dropout_layer = Dropout(dropout)(input_layer)
    output_layer = Dense(output_size, activation='sigmoid', kernel_regularizer=regularizer)(dropout_layer)
    model = Model(input_layer,output_layer)
    
    return model

In [25]:
def compile_and_fit(model, train_data, train_labels, num_epochs=100, lr=0.001, validation_split=0.15, validation_data=None, batch_size=None, callbacks=None):
    
    model.compile(optimizer=optimizers.Adam(lr=lr), loss='binary_crossentropy',metrics=['binary_crossentropy'])
    history = model.fit(train_data, train_labels, epochs=num_epochs, shuffle=True, validation_split=validation_split, validation_data=validation_data, batch_size=batch_size, verbose=0, callbacks=callbacks)
    model_performance_vis(history,'Crossentropy Loss','loss')
    return history
    

In [26]:
def get_probability_values(clf,test_data):
    
    predicted_probs = clf.predict_proba(test_data)
    predicted_values = []
    for arr in predicted_probs:
        prob = arr[:,1]
        predicted_values.append(prob)
    
    predicted_values = (np.asarray(predicted_values)).transpose()
    
    return predicted_values

## Model Evaluation and Visualization

In [27]:
def predict_and_threshold(model,test_data,test_labels):
    
    predicted_values = model.predict(test_data)
    threshold_curve(test_labels,predicted_values)
    threshold_curve1(test_labels,predicted_values)
    
    return predicted_values

In [28]:
def model_performance_vis(history, metric_type, metric):
    figure, ax= plt.subplots(1,1, figsize=(7,5))
    ax.set_title('Train - Validation')
    ax.plot(history.history[metric], 'r', label='Training '+metric)
    ax.plot(history.history['val_'+metric], 'g' , ls='--', label='Validation '+metric)
    ax.set_xlabel('epochs')
    ax.set_ylabel(metric_type)
    ax.legend(bbox_to_anchor=(1, 1), loc=1, borderaxespad=1)
    plt.show()

In [29]:
def model_performance(history, history1, metric):
    figure, ax= plt.subplots(1,1, figsize=(7,5))
    ax.set_title('Train - Validation - Test')
    ax.plot(history.history[metric], 'r', label='Training '+metric)
    ax.plot(history.history['val_'+metric], 'g' , label='Validation '+metric)
    ax.plot(history1.history['val_'+metric], 'b' , label='Test '+metric)
    ax.set_xlabel('epochs')
    ax.set_ylabel(metric)
    ax.legend(bbox_to_anchor=(1, 1), loc=1, borderaxespad=1)
    plt.show()

In [30]:
def threshold_labels(predicted_values,threshold):
    predicted_labels = np.array(predicted_values)
    for index, x in np.ndenumerate(predicted_values):
        if x < threshold:
            predicted_labels[index] = 0
        else:
            predicted_labels[index] = 1
    
    return predicted_labels

In [2]:
def threshold_curve(true_labels,predicted_values):
    
    curve_values = []
    thresholds = np.linspace(0,1.1,num=11,endpoint=False)
    for threshold in thresholds:
        labels = threshold_labels(predicted_values,threshold)
        values = model_evaluation(true_labels,labels)[0]
        curve_values.append(values)
        
    curve_values = np.asarray(curve_values)
    
    figure,ax = plt.subplots(1,1, figsize=(7,5))
    ax.set_title('Threshold vs Performance')
    ax.set_xlabel('Threshold')
    ax.set_ylabel('Measure')
    ax.plot(thresholds,curve_values[:,0], 'r', ls='--', label='Recall')
    ax.plot(thresholds,curve_values[:,1], 'g', ls =':', label='Specificity')
    ax.plot(thresholds,curve_values[:,2], 'b', label='Average')
    #ax.plot(thresholds,curve_values[:,5], label='F-1')
    ax.legend(bbox_to_anchor=(0.5,0), loc=8, borderaxespad=1)
    plt.show()        

In [1]:
def threshold_curve1(true_labels,predicted_values):
    
    curve_values = []
    thresholds = np.linspace(0,1.1,num=11,endpoint=False)
    for threshold in thresholds:
        labels = threshold_labels(predicted_values,threshold)
        values = model_evaluation(true_labels,labels)[0]
        curve_values.append(values)
        
    curve_values = np.asarray(curve_values)
    
    figure,ax = plt.subplots(1,1, figsize=(7,5))
    ax.set_title('Threshold vs Performance')
    ax.set_xlabel('Threshold')
    ax.set_ylabel('Measure')
    ax.plot(thresholds,curve_values[:,0], 'r', ls='--', label='Recall')
    ax.plot(thresholds,curve_values[:,4], 'g', ls =':', label='Precision')
    ax.plot(thresholds,curve_values[:,5], 'b', label='F1-score')
    #ax.plot(thresholds,curve_values[:,5], label='F-1')
    ax.legend(bbox_to_anchor=(0.5,0), loc=8, borderaxespad=1)
    plt.show()   

In [32]:
def max_avg(true_labels,predicted_values):
    
    curve_values = []
    thresholds = np.linspace(0,1.1,num=11,endpoint=False)
    for threshold in thresholds:
        labels = threshold_labels(predicted_values,threshold)
        values = model_evaluation(true_labels,labels)[0]
        curve_values.append(values)
        
    curve_values = np.asarray(curve_values)
    max_average = np.max(curve_values[:,2])
    return max_average

In [33]:
def model_evaluation(test_labels,predicted_labels):
    
    #TP,FP,FN,TN
    num_labels = test_labels.shape[1]
    confusion_matrix = np.zeros((num_labels,4))
    sum_labels = test_labels + predicted_labels
    for i,row in enumerate(sum_labels):
        for j,element in enumerate(row):
            if element==2:
                confusion_matrix[j,0]+=1
            elif element==0:
                confusion_matrix[j,3]+=1
            elif element==1:
                if test_labels[i,j]==1:
                    confusion_matrix[j,2]+=1
                else:
                    confusion_matrix[j,1]+=1
                    
    evaluation_metrics = np.zeros((num_labels,6))
    for i,row in enumerate(confusion_matrix):
        recall = row[0]/(row[0] + row[2])
        specificity = row[3]/(row[3] + row[1])
        average = (recall + specificity)/2
        accuracy = (row[0] + row[3])/(row[0] + row[1] + row[2] + row[3])
        precision = row[0]/(row[0] + row[1])
        f1_score = (2*precision*recall)/(precision+recall)
        #f1_score = (2*row[0])/((2*row[0]) + row[1] + row[2])
        
        
        evaluation_metrics[i,0] = recall
        evaluation_metrics[i,1] = specificity
        evaluation_metrics[i,2] = average
        evaluation_metrics[i,3] = accuracy
        evaluation_metrics[i,4] = precision
        evaluation_metrics[i,5] = f1_score
        
        
    avg = []
    avg.append(np.nanmean(evaluation_metrics[:,0]))
    avg.append(np.nanmean(evaluation_metrics[:,1]))
    avg.append(np.nanmean(evaluation_metrics[:,2]))
    avg.append(np.nanmean(evaluation_metrics[:,3]))
    avg.append(np.nanmean(evaluation_metrics[:,4]))
    avg.append(np.nanmean(evaluation_metrics[:,5]))
    avg = np.asarray(avg)
    
     #sen, spec, avg
        
    return avg,confusion_matrix, evaluation_metrics

In [1]:
def get_roc_curve(predicted_values, test_labels, avg_type='macro'):
    
    Y_test = test_labels
    y_score = predicted_values
    
    n_classes = test_labels.shape[1]
    
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(Y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr[avg_type], tpr[avg_type], _ = roc_curve(Y_test.ravel(), y_score.ravel())
    roc_auc[avg_type] = auc(fpr[avg_type], tpr[avg_type])

    plt.figure(figsize=(7,5))
    lw = 2
    plt.plot(fpr[2], tpr[2], color='darkorange',
         lw=lw, label='ROC curve (AUC = %0.4f)' % roc_auc[2])
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc="lower right")
    plt.show()

In [2]:
def get_precision_recall_curve(predicted_values, test_labels, avg_type='macro'):
    
    Y_test = test_labels
    y_score = predicted_values
    
    n_classes = test_labels.shape[1]
    
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(Y_test[:, i],
                                                        y_score[:, i])
        average_precision[i] = average_precision_score(Y_test[:, i], y_score[:, i])

    # A "micro-average": quantifying score on all classes jointly
    precision[avg_type], recall[avg_type], _ = precision_recall_curve(Y_test.ravel(), y_score.ravel())
    
    average_precision[avg_type] = average_precision_score(Y_test, y_score, average=avg_type)

    area = auc(recall[avg_type], precision[avg_type])
    
    plt.figure(figsize=(7,5))
    plt.step(recall[avg_type], precision[avg_type], color='b', alpha=0.2, where='post')
    
    plt.fill_between(recall[avg_type], precision[avg_type], alpha=0.2, color='b')

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall Curve (AUC = %0.4f)' % area)
    plt.show()
    
    return area