In [None]:
import os
import PIL
import glob
import random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
import seaborn as sns

import sklearn.metrics as metrics
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay

import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from keras.models import load_model
from tensorflow.random import set_seed



## Data Preparation

In [None]:
train_path = 'Vegetable Images/train'
val_path = 'Vegetable Images/validation'
test_path = 'Vegetable Images/test'

In [None]:
data_dicts = {
    'train':[],
    'test':[],
    'val':[]
}
data_labels = {
    'train':[],
    'test':[],
    'val':[]
}

In [None]:
for phase in ['train','val','test']:
    if phase == 'train':
        data_dir = train_path
    elif phase == 'val':
        data_dir = val_path
    else:
        data_dir = test_path
    class_names = sorted(x for x in os.listdir(data_dir)
                            if os.path.isdir(os.path.join(data_dir, x)))
    num_class = len(class_names) 
    image_files = [
        [
            os.path.join(data_dir, class_names[i], x)
            for x in os.listdir(os.path.join(data_dir, class_names[i]))
        ]
        for i in range(num_class)
    ]
    num_each = [len(image_files[i]) for i in range(num_class)]
    image_files_list = [] 
    image_class = []
    for i in range(num_class):
        image_files_list.extend(image_files[i])
        image_class.extend([i] * num_each[i])
    data_dicts[phase] = image_files_list
    data_labels[phase] = image_class
    num_total = len(image_class)
    image_width, image_height = PIL.Image.open(image_files_list[0]).size
    print(f"{phase} information")
    print(f"Image dimensions: {image_width} x {image_height}")
    print(f"Label names: {class_names}")
    print(f"Label counts: {num_each}")
    print(f"Total image count: {num_total}")
    print('--'*8)

In [None]:
df = pd.DataFrame({'path': data_dicts['train'],'label': data_labels['train']})
sorted_counts  = df.value_counts('label')
fig = px.pie(names=class_names, values=sorted_counts, title="Class Distribution in Trainset", hole=0.3)
fig.show()

In [None]:
plt.subplots(3, 3, figsize=(8, 8))
for i, k in enumerate(np.random.randint(num_total, size=9)):
    im = PIL.Image.open(data_dicts['train'][k])
    arr = np.array(im)
    plt.subplot(3, 3, i + 1)
    plt.xlabel(class_names[image_class[k]])
    plt.imshow(arr, vmin=0, vmax=255)
plt.tight_layout()
plt.show()

## Data Loading

In [None]:
batch_size = 32
data_generator = {
    'train':ImageDataGenerator(rescale=1/255.,width_shift_range=0.1,
                               height_shift_range=0.1,horizontal_flip=True,rotation_range=10),
    'val':ImageDataGenerator(rescale=1/255.,width_shift_range=0.1,
                               height_shift_range=0.1),
    'test':ImageDataGenerator(rescale=1/255.),  
}

In [None]:
data_sets = {
    'train':data_generator['train'].flow_from_directory(train_path,shuffle=True,class_mode='binary' ,batch_size=batch_size),
    'val':data_generator['val'].flow_from_directory(val_path,shuffle=False,class_mode='binary',batch_size=batch_size),
    'test':data_generator['test'].flow_from_directory(test_path,shuffle=False,class_mode='binary',batch_size=batch_size)
}

In [None]:
for i,(images,labels) in enumerate(data_sets['train']):
    print('labels of first batch in trainset:',labels)
    print(labels.shape)
    print(images.shape)
    break

## Data Visualization 


In [None]:
def show_images(GRID=[3,3], model=None, size=(8,8), data=data_sets['train']):
    n_rows = GRID[0]
    n_cols = GRID[1]
    n_images = n_cols * n_rows
    
    i = 1
    plt.figure(figsize=size)
    for images, labels in data:
        id = np.random.randint(32)
        image, label = images[id], class_names[int(labels[id])]
        
        plt.subplot(n_rows, n_cols, i)
        plt.imshow(image)
        
        if model is None:
            title = f"Class : {label}"
        else:
            pred = class_names[int(np.argmax(model.predict(image[np.newaxis, ...])))]
            title = f"Org : {label}, Pred : {pred}"
        
        plt.title(title)
        plt.axis('off')
        
        i+=1
        if i>=(n_images+1):
            break
            
    plt.tight_layout()
    plt.show()

In [None]:
show_images()

## Model

In [None]:
pretrained = tf.keras.applications.mobilenet_v2.MobileNetV2(
    input_shape=[256,256,3], include_top=False, 
    classifier_activation='softmax',
)
pretrained.trainable = False
model = tf.keras.models.Sequential([
    pretrained,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(15, activation='softmax')
])

## Model Training 

In [None]:
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint("final_model.h5", save_best_only=True)
early_stopping_callback = tf.keras.callbacks.EarlyStopping( monitor="val_loss",patience=5, restore_best_weights=True)

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
epochs = 10
history = model.fit(data_sets['train'], validation_data=data_sets['val'], 
                             epochs=epochs,callbacks=[checkpoint_callback,early_stopping_callback])

In [None]:
history_df = pd.DataFrame(history.history)

In [None]:
history_df

In [None]:
line = px.line(data_frame= history_df ,y =["accuracy","val_accuracy"] , markers = True)
line.update_xaxes(title="epochs",
              rangeslider_visible = False)
line.update_yaxes(title = "Accuracy")
line.update_layout(showlegend = True,
    title = {
        'text': 'train acc Vs. val acc',
        'y':0.94,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
line.show()

In [None]:
line = px.line(data_frame= history_df ,y =["loss" , "val_loss"] , markers = True)
line.update_xaxes(title="epochs",
              rangeslider_visible = False)
line.update_yaxes(title = "Loss")
line.update_layout(showlegend = True,
    title = {
        'text': 'train loss Vs. val loss',
        'y':0.94,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
line.show()

## Model Saving 

In [None]:
model.save('vgg19.h5')

## Model Evaluation 

In [None]:
model_path = './final_model.h5'
model = load_model(model_path)
model.summary()

## Indicators Report 


In [None]:
print(classification_report(labels_classes,pred_classes))

## Confusion Matrix

In [None]:
classes = list(data_sets['test'].class_indices.keys())
real_classes = [classes[x] for x in data_sets['test'].labels]

pred = tf.argmax(model.predict(data_sets['test']),axis=1)
pred_classes = [classes[x] for x in pred]

## Using sns 

In [None]:
cf_matrix = metrics.confusion_matrix(real_classes,pred_classes)
sns.set_theme(rc={'figure.figsize':(11,10)})
ax = sns.heatmap(cf_matrix,annot=True,cmap='Reds',fmt="g",xticklabels=classes,yticklabels=classes,cbar=False)
ax.set_ylabel('True Labels')
ax.set_xlabel('Predicted Labels');

## Using plotly 

In [None]:
def plot_confusion_matrix(y_true, y_pred, class_names):
    confusion_matrix = metrics.confusion_matrix(y_true, y_pred)
    confusion_matrix = confusion_matrix.astype(int)

    layout = {
        "title": "Confusion Matrix", 
        "xaxis": {"title": "Predicted value"}, 
        "yaxis": {"title": "Real value"}
    }

    fig = go.Figure(data=go.Heatmap(z=confusion_matrix,
                                    x=class_names,
                                    y=class_names,
                                    hoverongaps=False,
                                   ),
                    layout=layout)
    fig.show()

In [None]:
plot_confusion_matrix(real_classes,pred_classes,classes)