### CNN VGG16 Image Classification

In [1]:
train_models_flag = 0

In [2]:
import time
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
np.random.seed(2017) 

#Keras, deep learning libraries
from keras.models import Sequential, load_model
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras import backend as K
from keras.datasets import cifar10
import os
import tensorflow as tf
from tensorflow.python.client import device_lib

import pickle
import pandas as pd
from skimage import io
from scipy import misc
import skimage.transform as st
%pylab inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import h5py
from flask import Flask, request, render_template
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


Populating the interactive namespace from numpy and matplotlib


In [3]:
import keras
from tqdm import tqdm
from keras.preprocessing import image
from keras.utils import to_categorical

### Support Function for model evaluation### 

In [4]:
#plot model accuracy and loss function
def plot_model_history(model_history):
    fig, axs = plt.subplots(1,2,figsize=(15,5))
    # summarize history for accuracy
    axs[0].plot(range(1,len(model_history.history['accuracy'])+1),model_history.history['accuracy'])
    axs[0].plot(range(1,len(model_history.history['val_accuracy'])+1),model_history.history['val_accuracy'])
    axs[0].set_title('Model Accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xlabel('Epoch')
    axs[0].set_xticks(np.arange(1,len(model_history.history['accuracy'])+1),len(model_history.history['accuracy'])/10)
    axs[0].legend(['train', 'val'], loc='best')
    # summarize history for loss
    axs[1].plot(range(1,len(model_history.history['loss'])+1),model_history.history['loss'])
    axs[1].plot(range(1,len(model_history.history['val_loss'])+1),model_history.history['val_loss'])
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    axs[1].set_xticks(np.arange(1,len(model_history.history['loss'])+1),len(model_history.history['loss'])/10)
    axs[1].legend(['train', 'val'], loc='best')
    plt.show()

In [5]:
#compute test accuracy
def accuracy(test_x, test_y, model):
    result = model.predict(test_x)
    predicted_class = np.argmax(result, axis=1)
    true_class = np.argmax(test_y, axis=0)
    num_correct = np.sum(predicted_class == true_class) 
    accuracy = float(num_correct)/result.shape[0]
    return (accuracy * 100)

### Load Dataset

In [6]:
import os

In [7]:
os.getcwd()

'/Users/sophia/Desktop/CMPE 257/Project/code'

In [8]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random


TRAIN_DATADIR = '/Users/sophia/Desktop/Train_2'
TEST_DATADIR = '/Users/sophia/Desktop/Test_2'
CATEGORIES = ["GreyPowder", "GrayMold", "MosaicVirus", "Healthy"]
IMG_SIZE = 224


def normalize(x):
    """
    Normalize a list of sample image data in the range of 0 to 1
    : x: List of image data.  The image shape is (32, 32, 3)
    : return: Numpy array of normalized data
    """
    return np.array((x - np.min(x)) / (np.max(x) - np.min(x)))


def create_data(data_directory):
    _data = []
    for category in CATEGORIES:
        path = os.path.join(data_directory, category)
        class_num = CATEGORIES.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread((os.path.join(path, img)), cv2.IMREAD_COLOR)
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                _data.append([new_array, class_num])
            except Exception as e:
                pass
    random.shuffle(_data)
    return _data


def pickle_training_data(is_pickle=False):
    training_data = create_data(TRAIN_DATADIR)
    X_train, y_train = [], []
    for features, label in training_data:
        X_train.append(features)
        y_train.append(label)

    X_train = np.array(X_train).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
    X_train = normalize(X_train)

    if is_pickle:
        pickle_out = open("X_train.pickle", "wb")
        pickle.dump(X_train, pickle_out)
        pickle_out.close()

        pickle_out = open("y_train.pickle", "wb")
        pickle.dump(y_train, pickle_out)
        pickle_out.close()
    else:
        return X_train, y_train


def pickle_test_data(is_pickle=False):
    training_data = create_data(TEST_DATADIR)
    X_test, y_test = [], []
    for features, label in training_data:
        X_test.append(features)
        y_test.append(label)

    X_test = np.array(X_test).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
    X_test = normalize(X_test)

    if is_pickle:
        pickle_out = open("X_test.pickle", "wb")
        pickle.dump(X_test, pickle_out)
        pickle_out.close()

        pickle_out = open("y_test.pickle", "wb")
        pickle.dump(y_test, pickle_out)
        pickle_out.close()
    else:
        return X_test, y_test

In [9]:
X, y = pickle_training_data()
X_test, y_test = pickle_test_data()

In [15]:
#explore dataset
print('X', X.shape)
print('y', np.array(y).shape)
print('X_test', X_test.shape)
print('y_test', np.array(y_test).shape)

X (16648, 224, 224, 3)
y (16648,)
X_test (4683, 224, 224, 3)
y_test (4683,)


### Image Dataset Visualization

In [None]:
#Visualizing images for each class in the dataset
# class_names = ['Healthy','Mosaic_Virus','Gray_Mold','Gray_Powdery','Bipolaris']
# print(f"number of target classes : {num_classes}")

# fig = plt.figure(figsize=(8,3))
# for i in range(num_classes):
#     ax = fig.add_subplot(1, 5, 1 + i, xticks=[], yticks=[])
#     idx = np.where(data_labels[:]==i)[0]
#     features_idx = data_features[idx,::]
#     img_num = np.random.randint(features_idx.shape[0])
#     im = np.transpose(features_idx[img_num,::], (0,1,2))
#     ax.set_title(class_names[i])
#     plt.imshow(im)
# plt.show()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sb
import sklearn.tree as tree
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
%matplotlib inline

In [None]:
#create the parameter grid
param_grid = {
    'bootstrap': [True],
    'max_depth': [15,20,25,30,50],
    'n_estimators': [90,100,200,300,400,500]
}
rf_1 = RandomForestClassifier(random_state=0, verbose=1, n_jobs=3)
grid_search = GridSearchCV(estimator=rf_1, param_grid=param_grid, cv=3)
#fit the grid search to the data
grid_search.fit(X,y)

In [None]:
# plot the hyper-parameter tuning
def class_plot(grid, grid_param, title):
    scores = [x for x in grid.cv_results_['mean_test_score']]
    m_depth = grid_param['max_depth']
    n_est = grid_param['n_estimators']
    scores = np.array(scores).reshape(len(m_depth), len(n_est))
    fig = plt.figure()
    ax = plt.subplot(111)
    for ind, i in enumerate(m_depth):
        plt.plot(n_est, scores[ind], '-o', label='Max depth' + str(i),)
    ax.legend(loc='lower right') #, bbox_to_anchor=(1, 0.5))
    plt.xlabel('n estimator')
    plt.ylabel('Mean score')
    plt.title(title)
    plt.show()

In [None]:
class_plot(grid_search,param_grid,"Grid Search Result")
print (grid_search_2.best_params_)
print (grid_search_2.best_score_)

In [None]:
# re-train the model with full training set
rf_best = grid_search.best_estimator_
rf_best.fit(X, y)
y_predict = rf_best.predict(X_test)

### Plot Confusion Matrix

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
# Get and reshape confusion matrix data
matrix = confusion_matrix(y_test, y_predict)
matrix = matrix.astype('float') / matrix.sum(axis=1)[:, np.newaxis]

# Build the plot
plt.figure(figsize=(10,6))
sns.set(font_scale=1.2)
sns.heatmap(matrix,annot=True, annot_kws={'size':10},cmap=plt.cm.Greens,linewidths=0.1)

# Add labels to the plot
class_names = ['Healthy', 'MosaicVirus','GrayMold','GrayPowdery']
tick_marks = np.arange(len(class_names)+1)
#tick_marks2 = tick_marks  + 0.1
plt.xticks(tick_marks, class_names, rotation=25)
plt.yticks(tick_marks, class_names, rotation=0)
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.title('Confusion Matrix for Random Forest Model')
plt.show()

### Model Evaluation

In [None]:
print("Accuracy:\n",accuracy_score(y_test, y_predict),"\n")
print("Confusion Matrix:\n",confusion_matrix(y_test,y_predict),"\n")

print("Classification Report:\n",classification_report(y_test, y_predict))

### Plot the tree

In [None]:
from sklearn.datasets import make_blobs
from ipywidgets import interactive, IntSlider
import ipywidgets as widgets
from IPython.display import display
from IPython.display import display

In [None]:
class_names = {'Healthy':0, 'Mosaic_Virus':1,'Gray_Mold':2,'Gray_Powdery':3}
y_train_num = pd.Series( [class_names[label] for label in y_train])
y_test_num = pd.Series( [class_names[label] for label in y_test])

print(y_train.shape)

f_list = [x for x in range(2352)]
# Import tools needed for visualization
from sklearn.tree import export_graphviz
import pydot
# Pull out one tree from the forest
tree = rf_1.estimators_[5]
# Import tools needed for visualization
from sklearn.tree import export_graphviz
import pydot
# Pull out one tree from the forest
tree = rf_1.estimators_[5]
# Export the image to a dot file
export_graphviz(tree, out_file = 'tree.dot', feature_names = f_list, rounded = True, precision = 1)
# Use dot file to create a graph
(graph, ) = pydot.graph_from_dot_file('tree.dot')
# Write graph to a png file
graph.write_png('tree.png')

import graphviz
with open("tree.dot") as f:
    dot_graph = f.read()
src = graphviz.Source(dot_graph)
src.render('test-output.gv', view=True) 