In [None]:
from PIL import Image
import numpy as np
import os
from tensorflow import argmax, random
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
import gc



# LOADING THE IMAGES
Of all the 10 folders, 373 images were read from these folders; Tomato___Leaf_Mold, Tomato___Target_Spot, Tomato___Tomato_mosaic_virus,Tomato___healthy and saved in a list (lst_images) alongside their disease label in a list (lbl_images)

In [None]:
selected_folders=['Tomato___Leaf_Mold','Tomato___Target_Spot','Tomato___Tomato_mosaic_virus','Tomato___healthy']

lst_images=[]
lbl_images=[]
for folder in os.listdir('../input/fulltomatodiseasedetection/full_plant'):
    if folder in selected_folders:
        i=0
        for file in os.listdir('../input/fulltomatodiseasedetection/full_plant/'+folder):
            img_path='../input/fulltomatodiseasedetection/full_plant/'+folder+'/'+file
            image_array=np.asarray(Image.open(img_path))
            lst_images.append(image_array)
            lbl_images.append(folder)
            i +=1
            if i==373:
                break
print('total images: ', len(lbl_images))
print('Dimension of each image: ', lst_images[0].shape)
print('labels: ',set(lbl_images))

## Example of a picture

In [None]:
index =500
plt.imshow(lst_images[index])
print ( "it's a "+ lbl_images[index] +"  picture.")

## PRE-PROCESSING

lst_images is made a numpy array and it value standardized (i.e scaled between 0-1) and saved in X
Y is made a ctegorical matrix

In [None]:
from sklearn.preprocessing import OrdinalEncoder
X= np.asarray(lst_images)/255

#The Values of Y are encoded
lbl_images_array=np.asarray(lbl_images).reshape((len(lbl_images), -1))
lbl_encoded=OrdinalEncoder(categories=[selected_folders]).fit_transform(lbl_images_array) # the labels are made an array
Y= to_categorical(lbl_encoded)
print('Y :')
print(Y)

## SPLITING THE DATA

X and Y is shuffled and splitted into train and test set in ratio 7:3

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)


In [None]:
#Free up Ram
del(lst_images)
del(X)
gc.collect()

## Create ANN Model Using Tensorflow Keras

Architecture of the Network is :-

1). Input layer for 256x256x3 images in X

2). Dense layers with 1000 neurons and ReLU activation function

3). Output layer with 4 neurons for classification of input images as one of 4 disease 'Tomato___Leaf_Mold','Tomato___Target_Spot','Tomato___Tomato_mosaic_virus','Tomato___healthy')

In [None]:


#Creating The ANN object
ann=Sequential()

#Adding The Input Layer
ann.add(Flatten(input_shape=(256, 256, 3)))


#Adding the first layer
ann.add(Dense(units=1000, activation='relu'))

#Adding the second layer
ann.add(Dense(units=1000, activation='relu'))

#Adding the third layer
ann.add(Dense(units=1000, activation='relu'))

#Adding The Output Layer
ann.add(Dense(units = 4, activation = 'softmax'))

#Visualizing the model
plot_model(ann, to_file='model_plot.png', show_shapes= True, show_layer_names=True)

#Compiling the ANN
ann.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])


## Training and Validation
The ann.fit method adjusts the model parameters to minimize the loss:

In [None]:
ann.fit(x_train, y_train, epochs = 20)

The ann.evaluate method checks the models performance(accuracy), usually on a "Test-set".

In [None]:
#free up RAM
del(x_train)
gc.collect()

In [None]:

loss, acc = ann.evaluate(x_test,  y_test, verbose=False)
print(f"The ANN has {acc*100:.2f}"+ '% accuracy')

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = ann.predict(x_test)

predicted_categories = argmax(y_pred, axis=1)

y_test_categories= argmax(y_test, axis=1)

#true_categories = tf.concat([y for x, y in test_dataset], axis=0)

cf=confusion_matrix(y_test_categories, predicted_categories)


In [None]:
import seaborn as sns
def make_confusion_matrix(cf,
                          categories=selected_folders,
                          cbar=True,
                          cmap='Blues',
                          title=None):


    # CODE TO GENERATE TEXT INSIDE EACH SQUARE
    blanks = ['' for i in range(cf.size)]


    group_counts = ["{0:0.0f}\n".format(value) for value in cf.flatten()]


    group_percentages = ["{0:.2%}".format(value) for value in cf.flatten()/np.sum(cf)]

    box_labels = [f"{v1}{v2}".strip() for v1, v2 in zip(group_counts,group_percentages)]
    box_labels = np.asarray(box_labels).reshape(cf.shape[0],cf.shape[1])

    figsize = plt.rcParams.get('figure.figsize')

    # MAKE THE HEATMAP VISUALIZATION
    plt.figure(figsize=figsize)
    sns.heatmap(cf,annot=box_labels,fmt="",cmap=cmap,cbar=cbar,xticklabels=categories,yticklabels=categories)

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    if title:
        plt.title(title)

In [None]:
make_confusion_matrix(cf,categories=selected_folders,
                          cbar=True,
                          cmap='Blues',
                          title=None)