In [None]:
import numpy as np
import pandas as pd
import os
import shutil
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras import regularizers
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import seaborn as sns

# Set random seed for reproducibility
np.random.seed(123)
tf.random.set_seed(123)

from google.colab import drive
drive.mount('/content/mydrive')

sdir=r''   # Update with your directory path to the train folder
csvpath=r''   # Update with your file path to the csv folder

# Read CSV into DataFrame
df = pd.read_csv(csvpath)
df.columns = ['filepaths', 'labels']
df['filepaths'] = df['filepaths'].apply(lambda x: os.path.join(sdir, x))

# Split the dataset into train, test, and validation sets
train_df, dummy_df = train_test_split(df, train_size=0.9, shuffle=True, random_state=123, stratify=df['labels'])
test_df, valid_df = train_test_split(dummy_df, test_size=0.5, shuffle=True, random_state=123, stratify=dummy_df['labels'])

# Define a function to trim dataset based on sample size
def trim(df, max_size, min_size, column):
    # Group dataframe by column
    groups = df.groupby(column)
    sample_list = []
    for label in df[column].unique():
        group = groups.get_group(label)
        sample_count = len(group)
        if sample_count > max_size:
            strat = group[column]
            samples, _ = train_test_split(group, train_size=max_size, shuffle=True, random_state=123, stratify=strat)
            sample_list.append(samples)
        elif min_size <= sample_count <= max_size:
            sample_list.append(group)
    return pd.concat(sample_list, axis=0).reset_index(drop=True)

# Trim the training dataset
max_samples = 500
min_samples = 0
train_df = trim(train_df, max_samples, min_samples, 'labels')

# Set image size and other parameters
img_size = (224, 224)
batch_size = 20

# Data generators
train_gen = ImageDataGenerator(horizontal_flip=True, rotation_range=20, width_shift_range=0.2,
                               height_shift_range=0.2, zoom_range=0.2)
valid_gen = ImageDataGenerator()

train_data = train_gen.flow_from_dataframe(train_df, x_col='filepaths', y_col='labels', target_size=img_size,
                                           class_mode='categorical', color_mode='rgb', shuffle=True, batch_size=batch_size)
valid_data = valid_gen.flow_from_dataframe(valid_df, x_col='filepaths', y_col='labels', target_size=img_size,
                                           class_mode='categorical', color_mode='rgb', shuffle=False, batch_size=batch_size)

# Define the base model
base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=(img_size[0], img_size[1], 3), pooling='max')
base_model.trainable = True

# Add custom layers on top of base model
x = base_model.output
x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
x = Dense(1024, kernel_regularizer=regularizers.l2(0.016), activity_regularizer=regularizers.l1(0.006),
          bias_regularizer=regularizers.l1(0.006), activation='relu')(x)
x = Dropout(rate=0.3, seed=123)(x)
x = Dense(1024, kernel_regularizer=regularizers.l2(0.016), activity_regularizer=regularizers.l1(0.006),
          bias_regularizer=regularizers.l1(0.006), activation='relu')(x)
x = Dropout(rate=0.45, seed=123)(x)
output = Dense(len(train_data.class_indices), activation='softmax')(x)

# Compile the model
model = Model(inputs=base_model.input, outputs=output)
lr = 0.001
model.compile(Adamax(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])

# Define callbacks
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=4, verbose=1, restore_best_weights=True)
]

# Train the model
history = model.fit(train_data, epochs=25, verbose=1, callbacks=callbacks, validation_data=valid_data)

# Plot training history
def plot_training_history(history):
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy / Loss')
    plt.title('Training and Validation Metrics')
    plt.legend()
    plt.show()

plot_training_history(history)

# Make predictions on the test set
test_data = valid_gen.flow_from_dataframe(test_df, x_col='filepaths', y_col='labels', target_size=img_size,
                                          class_mode='categorical', color_mode='rgb', shuffle=False,
                                          batch_size=len(test_df))  # Adjust batch_size

y_pred = model.predict(test_data)
y_true = test_data.labels
y_pred_labels = np.argmax(y_pred, axis=1)

# Generate confusion matrix
cm = confusion_matrix(y_true, y_pred_labels)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', cbar=False)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Generate classification report
classes = list(train_data.class_indices.keys())
clr = classification_report(y_true, y_pred_labels, target_names=classes)
print("Classification Report:\n", clr)


In [None]:
subject='temp'
acc=str(( 0.87) * 100)
index=acc.rfind('.')
acc=acc[:index + 3]
save_id= subject + '_' + str(acc) + '.h5'
save_modelpath = # Update with path to save model
model_save_loc=os.path.join('save_modelpath', save_id)
model.save(model_save_loc)
print ('model was saved as ' , model_save_loc )