In [2]:
#imports

import tensorflow as tf
import numpy as np
import pandas as pd
import os
import glob
import cv2
import re
import keras
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Conv2D,Dense,Dropout,Flatten,MaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from keras.utils import np_utils
from PIL import ImageFile 
import matplotlib.pyplot as plt
from keras.preprocessing import image                  
from tqdm import tqdm
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, recall_score, precision_score, confusion_matrix, roc_curve, auc
from matplotlib.colors import ListedColormap
import seaborn as sns
import warnings; warnings.filterwarnings('ignore')
from sklearn.metrics import classification_report

# **Importing data**

In [3]:
data_dir = "../input/leafdataset/field"

root, dirs, files = next(os.walk(data_dir), ([],[],[]))
dirs.sort()
print(len(dirs))

In [4]:
print(dirs)

# **Resising the image**

In [5]:
def resize(fl, img_height, img_width):
    img = cv2.imread(fl)
    resized = cv2.resize(img, (img_height, img_width))
    return resized

# Making  data

In [6]:
def get_data():
    X = []
    y = []
    classes = []
    dir_names = []
    
    for dir_name in dirs[:30]:
        class_name = dir_name.replace('_','')
        classes.append(class_name)
        
        dir_names.append(dir_name)
        
        path = os.path.join(data_dir,dir_name,'*.jpg')
        images = glob.glob(path)
        
        for fl in images:
            flbase = os.path.basename(fl)
            img = resize(fl, 64, 64)
            X.append(img)
            y.append(class_name)
            
    return X, y, classes,dir_names

In [7]:
X, y, classes,dir_names = get_data()

In [8]:
print(dir_names)

In [9]:
X = np.array(X)


print('Shape of X is',len(X))
print('Shape of Y is',len(y))

In [10]:
class_dict={}
for i,class_name in enumerate(classes):
    class_dict[i] = class_name
    

print(len(class_dict))
    

# Sample images

In [11]:
# Sample Image for each class
img_array=[]
for i in dir_names:
    path = os.path.join(data_dir, i)
    
    for img in os.listdir(path):
        imgs = cv2.imread(os.path.join(path, img), cv2.IMREAD_COLOR)
        img_array.append(imgs)
        break

f, axarr = plt.subplots(2,5,figsize=(18,6))  

axarr[0,0].imshow(img_array[0])
axarr[0,0].set_title(class_dict.get('0')) 

axarr[0,1].imshow(img_array[1])
axarr[0,1].set_title(class_dict.get('1')) 

axarr[0,2].imshow(img_array[2])
axarr[0,2].set_title(class_dict.get('2')) 

axarr[0,3].imshow(img_array[3])
axarr[0,3].set_title(class_dict.get('3')) 

axarr[0,4].imshow(img_array[4])
axarr[0,4].set_title(class_dict.get('4')) 

#axarr[1,0].imshow(img_array[5])
#axarr[1,0].set_title(class_dict.get('5')) 

#axarr[1,1].imshow(img_array[6])
#axarr[1,1].set_title(class_dict.get('6')) 

#axarr[1,2].imshow(img_array[7])
#axarr[1,2].set_title(class_dict.get('7')) 

#axarr[1,3].imshow(img_array[8])
#axarr[1,3].set_title(class_dict.get('8')) 

#axarr[1,4].imshow(img_array[9])
#axarr[1,4].set_title(class_dict.get('9'))

# Transforming labels to numerical using label encoder

In [12]:
#y = np_utils.to_categorical(y, num_classes=184,dtype ="int32")


from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(y)
y_encoded = le.transform(y)

# Splitting into training and test data

In [13]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y_encoded,test_size = 0.3,stratify=y_encoded,random_state=42)

In [14]:
y_train_one_hot = np_utils.to_categorical(ytrain)
y_test_one_hot = np_utils.to_categorical(ytest)

# Downloading ResNet50

In [16]:
modelResNet = ResNet50(include_top=False,  weights='imagenet',)

In [17]:
for layer in modelResNet.layers:
    layer.trainable = False

# Extracting training features

In [18]:
%%time

feature_extractor_train=modelResNet.predict(Xtrain)

# Extracting testing features

In [19]:
%%time

feature_extractor_test=modelResNet.predict(Xtest)

In [20]:
training_features = feature_extractor_train.reshape(feature_extractor_train.shape[0], -1)
testing_features = feature_extractor_test.reshape(feature_extractor_test.shape[0], -1)

# Printing shapes of split data

In [21]:
xtrain = training_features
xtest = testing_features

print(xtrain.shape,xtest.shape,ytrain.shape,ytest.shape)

# Finding the best Hyperparameter

In [22]:
#Finding best hyperparameter using GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
n_estimators = [ 200, 900, 800,300]
max_depth = [2,3,6,7,10,12,14,15]
min_samples_split = [2, 5, 10, 15, 100]
min_samples_leaf = [1, 2, 5, 10] 
forest = RandomForestClassifier(random_state = 42)

hyperF = dict(n_estimators = n_estimators, max_depth = max_depth)

gridF = GridSearchCV(forest, hyperF, cv = 10, verbose = 1, 
                      n_jobs = -1)
bestF = gridF.fit(xtrain, ytrain)

In [23]:
best_parameters = gridF.best_params_
print(best_parameters)

In [24]:
RF_model = RandomForestClassifier(n_estimators = 300,max_depth=14,random_state = 42)

# Train the model on training data
RF_model.fit(xtrain, ytrain) #For sklearn no one hot encoding



# Training the model with the best hyperparameter

In [25]:
#Now predict using the trained RF model. 
prediction_RF = RF_model.predict(xtest)
#Inverse le transform to get original label back. 
#prediction_RF = le.inverse_transform(prediction_RF)

#Print overall accuracy
from sklearn import metrics
print ("Accuracy = ", metrics.accuracy_score(ytest, prediction_RF))

# Plotting the Confusion Matrix

In [26]:
fig, ax1 = plt.subplots(1,1, figsize=(10, 7))
# Plot confusion matrix
cm = confusion_matrix(ytest, prediction_RF)
sns.heatmap(cm, annot = True, cbar = False, fmt = "d", linewidths = .5, cmap = "Blues", ax = ax1)
ax1.set_title("Confusion Matrix")
ax1.set_xlabel("Predicted class")
ax1.set_ylabel("Actual class")
fig.tight_layout()

# Inversing the numbers to classes

In [29]:
prediction_RF = le.inverse_transform(prediction_RF)

In [32]:
ytest= le.inverse_transform(ytest)

# Printing the classification report

In [33]:
print(metrics.classification_report(ytest, prediction_RF, digits=3))

# Plotting ROC Curve

In [34]:
def plot_multiclass_roc(clf, X_test, y_test, n_classes, figsize=(17, 6)):
    y_score = clf.predict_proba(X_test)

    # structures
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # calculate dummies once
    y_test_dummies = pd.get_dummies(y_test, drop_first=False).values
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_dummies[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # roc for each class
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot([0, 1], [0, 1], 'k--')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic example')
    for i in range(n_classes):
        ax.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for label %i' % (roc_auc[i], i))
    ax.legend(loc="best")
    ax.grid(alpha=.4)
    sns.despine()
    plt.show()
    plt.savefig('resnet_knn_roc.png')
plot_multiclass_roc(RF_model, xtest, ytest, n_classes=30, figsize=(10, 10))