In [None]:
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt 
import statsmodels.api as sm
import seaborn as sns
import sklearn

from sklearn.model_selection import train_test_split
sns.set()

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

print("TF version:", tf.__version__)
print("Done loading packages")

In [None]:
import os

def scanData():
    data_count = pd.DataFrame(data={"flower": [], "count": []})
    meta_data_df = pd.DataFrame(data={"image_path": [], "flower_type": []})
    folder_path = "./Flowers_cleaned/"

    subdirectories = next(os.walk(folder_path))[1]

    print(subdirectories)

    for i in subdirectories:
        sub = next(os.walk(folder_path+"/"+i))
        
        new_meta_data = pd.DataFrame(data={"image_path": [ (sub[0] + "/" + j) for j in sub[2]] , "flower_type": i})
        meta_data_df = pd.concat([meta_data_df, new_meta_data], ignore_index=True)
        
        new_data = pd.DataFrame(data={"flower": i, "count": len(sub[2])}, index=[0])      
        data_count = pd.concat([data_count, new_data], ignore_index=True)
        print(i, len(sub[2]), len([ sub[0] + j for j in sub[2]]))

    return data_count, meta_data_df

img_count, meta_data = scanData()
img_count

In [None]:
meta_data

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(data=img_count, x='flower', y='count')

In [None]:
# plt.imshow(new_image = tf.image.per_image_standardization(mpimg.imread(meta_data['image_path'][i])))

In [None]:
import matplotlib.image as mpimg
import random

fig, ax = plt.subplots(5, 5, figsize=(20,20), sharex=False)
ax = ax.flatten()

# tf.image.per_image_standardization(
#     image
# )
# tf.image.rgb_to_hsv
for k, i in enumerate([random.randint(0, 4620) for i in range(25)]):
    ax[k].imshow(tf.image.per_image_standardization(mpimg.imread(meta_data['image_path'][i])))
    ax[k].set_xlabel(f"{meta_data['flower_type'][i]}")
    
plt.show()

In [None]:
def split_dataset(data):
    TEST_RATIO = 0.2
    train_data, test_data = train_test_split(data, test_size=TEST_RATIO, random_state=100)
    # train_data, val_data = train_test_split(train_data, test_size=TEST_RATIO, random_state=100)

    print("Train data : {}, Test Data: {}".format(
        train_data.shape[0], test_data.shape[0]))
    return train_data, test_data

train_data, test_data = split_dataset(meta_data)

In [None]:

plt.figure(figsize=(20,10))
sns.histplot(data=test_data, x='flower_type')

In [None]:
train_data['flower_type'] = train_data['flower_type'].astype('str')
test_data['flower_type'] = test_data['flower_type'].astype('str')



train_datagen = ImageDataGenerator( rotation_range=10, # rotation
        width_shift_range=0.2, # horizontal shift
        height_shift_range=0.2, # vertical shift
        zoom_range=0.2, # zoom
        horizontal_flip=True, # horizontal flip
        brightness_range=[0.2,1.2], # brightness
        rescale=1., data_format='channels_last')
test_datagen = ImageDataGenerator( rotation_range=10, # rotation
        width_shift_range=0.2, # horizontal shift
        height_shift_range=0.2, # vertical shift
        zoom_range=0.2, # zoom
        horizontal_flip=True, # horizontal flip
        brightness_range=[0.2,1.2], # brightness
        rescale=1., data_format='channels_last')

batch_size = 32
# train_datagen.fit(train_data)

train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_data,
        directory='.',
        x_col="image_path",
        y_col="flower_type",
        target_size=(64, 64),
    preprocessing_function=tf.image.per_image_standardization,
        batch_size=batch_size,
        class_mode='categorical')

test_generator = test_datagen.flow_from_dataframe(
        dataframe=test_data,
        directory='.',
        x_col="image_path",
    preprocessing_function=tf.image.per_image_standardization,
        y_col="flower_type",
        target_size=(64, 64),
        batch_size=batch_size,
        class_mode='categorical')



In [None]:
train_generator.image_data_generator

In [None]:
def plot_learning_curve(train_loss, val_loss, train_metric, val_metric, metric_name='Accuracy'):
    plt.figure(figsize=(10,5))
    
    plt.subplot(1,2,1)
    plt.plot(train_loss, 'r--')
    plt.plot(val_loss, 'b--')
    plt.xlabel("epochs")
    plt.ylabel("Loss")
    plt.legend(['train', 'val'], loc='upper left')

    plt.subplot(1,2,2)
    plt.plot(train_metric, 'r--')
    plt.plot(val_metric, 'b--')
    plt.xlabel("epochs")
    plt.ylabel(metric_name)
    plt.legend(['train', 'val'], loc='upper left')
    
    plt.axhline(y = max(train_metric), color = 'r', linestyle = '-')
    plt.axhline(y = max(val_metric), color = 'b', linestyle = '-')

    plt.show()

In [None]:
INPUT_DIM = (64,64,3)

HIDDEN_LAYER_1 = 256
HIDDEN_LAYER_2 = 64
HIDDEN_LAYER_3 = 8

OUTPUT_CLASSES = 8

model = tf.keras.Sequential([
    #VGG block 1
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', 
                           input_shape=INPUT_DIM),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    #VGG block 2
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    #VGG block 3
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(HIDDEN_LAYER_1, activation='relu'),
    tf.keras.layers.Dense(HIDDEN_LAYER_2, activation='relu'),
    tf.keras.layers.Dense(HIDDEN_LAYER_3, activation='softmax'),
    
    tf.keras.layers.Dense(OUTPUT_CLASSES)
])

model.summary()

In [None]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight(class_weight='balanced', y=train_generator.classes, classes=np.unique(train_generator.classes))
class_weights

In [None]:
class_weights_dict = {}
for k, v in enumerate(class_weights):
    class_weights_dict[k]= v
    

In [None]:

model.compile(optimizer='adam',
              loss='mean_squared_logarithmic_error', # tf.keras.losses.CategoricalCrossentropy(from_logits=True)
              metrics=['accuracy'])


In [None]:
from tensorflow.python.client import device_lib
len(device_lib.list_local_devices()) 

In [None]:
!nvidia-smi

In [None]:
history = model.fit(train_generator, validation_data = test_generator,
                              epochs=20, verbose=2, class_weight=class_weights_dict)


In [None]:
plot_learning_curve(history.history['loss'], history.history['val_loss'], 
                    history.history['accuracy'], history.history['val_accuracy'], 
                    metric_name='Accuracy')

In [None]:
# cm = sklearn.metrics.multilabel_confusion_matrix(test_generator.classes, model.predict(test_generator))
prediction = model.predict(test_generator)
index_max = prediction.argmax(axis=1)
print(index_max.shape)
# print(prediction[index_max])

In [None]:
prediction

In [None]:
prediction.shape

In [None]:

# fix, ax = plt.subplots(2, 4, sharex=True, figsize=(20,10))
# ax = ax.flatten()
# for i in range(7):
    # sns.histplot(data=new, ax=ax[i])

In [None]:
cm = tf.math.confusion_matrix(test_generator.classes, index_max, num_classes=8)
# cm = sklearn.metrics.multilabel_confusion_matrix(test_generator.classes, index_max)


print(cm)

In [None]:
plt.figure(figsize=(20,15))
fx=sns.heatmap(cm, annot=True, fmt=".2f",cmap="GnBu")
fx.set_title('Confusion Matrix \n');
fx.set_xlabel('\n Predicted Values\n')
fx.set_ylabel('Actual Values\n');
fx.xaxis.set_ticklabels(train_generator.class_indices)
fx.yaxis.set_ticklabels(train_generator.class_indices)
plt.show()
