In [None]:
# Import required libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.applications import MobileNetV2, VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from lightgbm import LGBMClassifier
import matplotlib.pyplot as plt
import os
import cv2
import seaborn as sns

In [None]:
#/kaggle/input/dataset-of-40-words/Dataset_of_40words
#/kaggle/input/dataset-30-words/Processed-dataset-of-30-words
#/kaggle/input/dataset-2-30-words/Processed-dataset-of-30-words-3
#/kaggle/input/dataset-30-words-real/40words

In [None]:
# Define Dataset Paths and Preprocess
class_path1 = "/kaggle/input/dataset-30-words-real/40words"
class1 = sorted(os.listdir(class_path1))

CLASSES = class1
CLASSES = [class_name.replace(" ", "").lower() for class_name in CLASSES]

# Set dataset directory (change path as needed)
dataset_path = "/kaggle/input/dataset-30-words-real/40words"
IMG_SIZE = 64  # Resize images to 64x64
CHANNELS = 1  # Use 1 for Gray images
LEN = sorted(os.listdir(dataset_path))  # Class names based on folder names

In [None]:
# Load dataset and preprocess
data = []
labels = []

for label, class_name in enumerate(LEN):
    class_path = os.path.join(dataset_path, class_name)
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        try:
            # Read, resize, and normalize the image
            img = cv2.imread(img_path)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))  # Resize images
            img = img / 255.0  # Normalize
            data.append(img)
            labels.append(class_name.strip())
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")

In [None]:
# Preprocess labels
labels = [label.replace(" ", "").lower() for label in labels]
label_mapping = {class_name: index for index, class_name in enumerate(CLASSES)}

# Convert string labels to numeric labels using the mapping
numeric_labels = [label_mapping[label] for label in labels]

# Convert data and labels to NumPy arrays
data = np.array(data, dtype=np.float32)
labels = np.array(numeric_labels, dtype=np.float32)

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)

In [None]:
# One-hot encode labels for deep learning models
y_train_dl = tf.keras.utils.to_categorical(y_train, num_classes=len(CLASSES))
y_test_dl = tf.keras.utils.to_categorical(y_test, num_classes=len(CLASSES))

# Flatten data for machine learning models
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
X_test_flattened = X_test.reshape(X_test.shape[0], -1)
y_train_flat = y_train.astype(int)
y_test_flat = y_test.astype(int)

In [None]:
def display_metrics(y_true, y_pred, model_name):
    # Calculate metrics
    acc = accuracy_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)

    # Plot metrics as a bar chart
    metrics = {'Accuracy': acc, 'Recall': recall, 'F1 Score': f1}
    plt.figure(figsize=(8, 5))
    plt.bar(metrics.keys(), metrics.values(), color=['skyblue', 'lightgreen', 'salmon'])
    plt.title(' ', fontsize=16)
    plt.ylim(0, 1)
    plt.ylabel('Score', fontsize=14)
    plt.xlabel(' ', fontsize=14)
    for i, (metric, value) in enumerate(metrics.items()):
        plt.text(i, value + 0.02, f'{value:.4f}', ha='center', fontsize=12)
    plt.show()

    # Plot confusion matrix as a heatmap
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, xticklabels=True, yticklabels=True)
    plt.title(f'{model_name} Confusion Matrix', fontsize=16)
    plt.xlabel('Predicted Labels', fontsize=14)
    plt.ylabel('True Labels', fontsize=14)
    plt.show()

    return acc, recall, f1

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2

# Improved CNN model
def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        BatchNormalization(),

        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        BatchNormalization(),

        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        BatchNormalization(),

        Conv2D(256, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        BatchNormalization(),

        GlobalAveragePooling2D(),
        Dropout(0.3),  # Added dropout here
        Dense(64, activation='relu', kernel_regularizer=l2(0.001)),  # Smaller Dense + L2
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])

    optimizer = Adam(learning_rate=1e-4)
    loss_fn = CategoricalCrossentropy(label_smoothing=0.1)  # Label smoothing

    model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])
    return model


In [None]:
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True, verbose=1),
    ModelCheckpoint("best_cnn_model.keras", monitor='val_accuracy', save_best_only=True, verbose=1)
]


In [None]:
def build_mobilenet_model(input_shape, num_classes):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False  # Freeze base model

    model = Sequential([
        base_model,
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
def build_vgg16_model(input_shape, num_classes):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False  # Freeze base model

    model = Sequential([
        base_model,
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
cnn_model = build_cnn_model(input_shape=(IMG_SIZE, IMG_SIZE, CHANNELS), num_classes=len(CLASSES))
history_cnn = cnn_model.fit(X_train, y_train_dl, validation_data=(X_test, y_test_dl), epochs=100, batch_size=32)

cnn_eval = cnn_model.evaluate(X_test, y_test_dl, verbose=0)
print(f"CNN Model - Test Accuracy: {cnn_eval[1]:.4f}")
# CNN Evaluation
cnn_predictions = cnn_model.predict(X_test)
display_metrics(y_test_dl.argmax(axis=1), cnn_predictions.argmax(axis=1), "Best CNN")

In [None]:
import matplotlib.pyplot as plt

# Plot accuracy graph
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history_cnn.history['accuracy'], label='Train Accuracy')
plt.plot(history_cnn.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')
plt.legend()
plt.grid()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history_cnn.history['loss'], label='Train Loss')
plt.plot(history_cnn.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')
plt.legend()
plt.grid()

# Show the plots
plt.tight_layout()
plt.show()


In [None]:
mobilenet_model = build_mobilenet_model(input_shape=(IMG_SIZE, IMG_SIZE, CHANNELS), num_classes=len(CLASSES))
history_mobilenet = mobilenet_model.fit(X_train, y_train_dl, validation_data=(X_test, y_test_dl), epochs=50, batch_size=32)

mobilenet_eval = mobilenet_model.evaluate(X_test, y_test_dl, verbose=0)
print(f"MobileNetV2 Model - Test Accuracy: {mobilenet_eval[1]:.4f}")
# MobileNetV2 Evaluation
mobilenet_predictions = mobilenet_model.predict(X_test)
display_metrics(y_test_dl.argmax(axis=1), mobilenet_predictions.argmax(axis=1), "Best MobileNetV2")

In [None]:
import matplotlib.pyplot as plt

# Plot accuracy graph
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history_mobilenet.history['accuracy'], label='Train Accuracy')
plt.plot(history_mobilenet.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')
plt.legend()
plt.grid()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history_mobilenet.history['loss'], label='Train Loss')
plt.plot(history_mobilenet.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')
plt.legend()
plt.grid()

# Show the plots
plt.tight_layout()
plt.show()


In [None]:
vgg16_model = build_vgg16_model(input_shape=(IMG_SIZE, IMG_SIZE, CHANNELS), num_classes=len(CLASSES))
history_vgg16 = vgg16_model.fit(X_train, y_train_dl, validation_data=(X_test, y_test_dl), epochs=50, batch_size=32)

vgg16_eval = vgg16_model.evaluate(X_test, y_test_dl, verbose=0)
print(f"VGG16 Model - Test Accuracy: {vgg16_eval[1]:.4f}")
# VGG16 Evaluation
vgg16_predictions = vgg16_model.predict(X_test)
display_metrics(y_test_dl.argmax(axis=1), vgg16_predictions.argmax(axis=1), "VGG16")

In [None]:
import matplotlib.pyplot as plt

# Plot accuracy graph
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history_vgg16.history['accuracy'], label='Train Accuracy')
plt.plot(history_vgg16.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')
plt.legend()
plt.grid()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history_vgg16.history['loss'], label='Train Loss')
plt.plot(history_vgg16.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')
plt.legend()
plt.grid()

# Show the plots
plt.tight_layout()
plt.show()


In [None]:
#Machine Learning Models
#KNeighborsClassifier
knn_params = {'n_neighbors': [3], 'weights': ['distance']}
knn = GridSearchCV(KNeighborsClassifier(), knn_params, cv=5, scoring='accuracy')
knn.fit(X_train_flattened, y_train_flat)
print(knn.best_params_)
knn_best_model = knn.best_estimator_
knn_predictions = knn_best_model.predict(X_test_flattened)
display_metrics(y_test_flat, knn_predictions, "KNeighborsClassifier")

In [None]:
#RandomForestClassifier
rf_params = {'n_estimators': [ 100], 'max_depth': [None, 10, 20]}
rf = GridSearchCV(RandomForestClassifier(random_state=42), rf_params, cv=5, scoring='accuracy')
rf.fit(X_train_flattened, y_train_flat)
print(rf.best_params_)
rf_best_model = rf.best_estimator_
rf_predictions = rf_best_model.predict(X_test_flattened)
display_metrics(y_test_flat, rf_predictions, "RandomForestClassifier")

In [None]:
# LogisticRegression
# lr_params = {'C': [0.1, 1.0], 'solver': ['lbfgs']}
# lr = GridSearchCV(LogisticRegression(max_iter=50), lr_params, cv=5, scoring='accuracy')
# lr.fit(X_train_flattened, y_train_flat)
# print(lr.best_params_)
# lr_best_model = lr.best_estimator_
# lr_predictions = lr_best_model.predict(X_test_flattened)
# display_metrics(y_test_flat, lr_predictions, "LogisticRegression")

In [None]:
# SVM
# svm_params = {'kernel': ['linear', 'rbf'], 'C': [0.1, 1]}
# svm = GridSearchCV(SVC(probability=True), svm_params, cv=5, scoring='accuracy')
# svm.fit(X_train_flattened, y_train_flat)
# print(svm.best_params_)
# svm_best_model = svm.best_estimator_
# svm_predictions = svm_best_model.predict(X_test_flattened)
# display_metrics(y_test_flat, svm_predictions, "SVM")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, recall_score

def create_performance_comparison_plots(models, y_true_list, y_pred_list, model_names):
    """
    Create three performance comparison plots (accuracy, F1-score, recall) for multiple models
    displayed vertically
    
    Args:
        models: List of model objects
        y_true_list: List of true labels for each model
        y_pred_list: List of predicted labels for each model
        model_names: List of model names for display
    """
    # Calculate metrics for each model
    accuracies = []
    f1_scores = []
    recalls = []
    
    for y_true, y_pred in zip(y_true_list, y_pred_list):
        accuracies.append(accuracy_score(y_true, y_pred))
        f1_scores.append(f1_score(y_true, y_pred, average='weighted'))
        recalls.append(recall_score(y_true, y_pred, average='weighted'))
    
    # Convert to percentages for display
    accuracies = [x * 100 for x in accuracies]
    f1_scores = [x * 100 for x in f1_scores]
    recalls = [x * 100 for x in recalls]
    
    # Common settings for all plots
    bar_width = 0.6
    indices = np.arange(len(model_names))
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    
    # Create figure with 3 subplots vertically
    plt.figure(figsize=(8, 15))
    
    # Accuracy plot (top)
    plt.subplot(3, 1, 1)
    bars1 = plt.bar(indices, accuracies, bar_width, color=colors)
    plt.title('Accuracy Comparison', fontsize=14, pad=20)
    plt.ylabel('Accuracy (%)', fontsize=12)
    plt.xticks(indices, model_names, rotation=45, ha='right', fontsize=10)
    plt.ylim(0, 100)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    # Add value labels on top of each bar
    # for bar in bars1:
    #     height = bar.get_height()
    #     plt.text(bar.get_x() + bar.get_width()/2., height,
    #             f'{height:.2f}%',
    #             ha='center', va='bottom', fontsize=10)
    
    # F1-score plot (middle)
    plt.subplot(3, 1, 2)
    bars2 = plt.bar(indices, f1_scores, bar_width, color=colors)
    plt.title('F1-Score Comparison', fontsize=14, pad=20)
    plt.ylabel('F1-Score (%)', fontsize=12)
    plt.xticks(indices, model_names, rotation=45, ha='right', fontsize=10)
    plt.ylim(0, 100)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    # for bar in bars2:
    #     height = bar.get_height()
    #     plt.text(bar.get_x() + bar.get_width()/2., height,
    #             f'{height:.2f}%',
    #             ha='center', va='bottom', fontsize=10)
    
    # Recall plot (bottom)
    plt.subplot(3, 1, 3)
    bars3 = plt.bar(indices, recalls, bar_width, color=colors)
    plt.title('Recall Comparison', fontsize=14, pad=20)
    plt.ylabel('Recall (%)', fontsize=12)
    plt.xticks(indices, model_names, rotation=45, ha='right', fontsize=10)
    plt.ylim(0, 100)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    # for bar in bars3:
    #     height = bar.get_height()
    #     plt.text(bar.get_x() + bar.get_width()/2., height,
    #             f'{height:.2f}%',
    #             ha='center', va='bottom', fontsize=10)
    
    plt.tight_layout()
    plt.show()

# Example usage with your models:
# First collect all the true and predicted labels for each model
y_true_list = [
    y_test_dl.argmax(axis=1),  # CNN
    y_test_dl.argmax(axis=1),  # MobileNet
    y_test_dl.argmax(axis=1),  # VGG16
    y_test_flat,               # KNN
    y_test_flat                # Random Forest
]

y_pred_list = [
    cnn_predictions.argmax(axis=1),
    mobilenet_predictions.argmax(axis=1),
    vgg16_predictions.argmax(axis=1),
    knn_predictions,
    rf_predictions
]

model_names = [
    "CNN",
    "MobileNetV2",
    "VGG16",
    "KNN",
    "Random Forest"
]

# Create the comparison plots
create_performance_comparison_plots([], y_true_list, y_pred_list, model_names)