In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define the path to the shared folder
# Make sure you have added the shared folder as a shortcut in "My Drive"
shared_folder_path = "/content/drive/My Drive/Dl+project"

# Check if the shared folder exists
import os

if os.path.exists(shared_folder_path):
    print(f"Shared folder found: {shared_folder_path}")
    print("Contents of the shared folder:")
    # List files in the shared folder
    print(os.listdir(shared_folder_path))
else:
    print(f"Shared folder not found at {shared_folder_path}.")
    print("Ensure you have added the shared folder as a shortcut in 'My Drive'.")

import os
import shutil
import cv2
import imghdr
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16, ResNet50, DenseNet121, MobileNetV2
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from google.colab import drive
from tensorflow.keras.preprocessing import image


drive.mount('/content/drive')

data_dir = "/content/drive/My Drive/Dl+project"

def clean_dataset(data_dir):
    image_exts = ['jpeg', 'jpg', 'bmp', 'png']
    cleaned_images = 0

    for image_class in os.listdir(data_dir):
        class_path = os.path.join(data_dir, image_class)
        images_to_remove = []

        for img in os.listdir(class_path):
            image_path = os.path.join(class_path, img)
            try:
                img = cv2.imread(image_path)
                tip = imghdr.what(image_path)

                if tip not in image_exts or img is None or img.size == 0:
                    images_to_remove.append(image_path)
            except Exception as e:
                print(f'Issue with image {image_path}: {e}')
                images_to_remove.append(image_path)

        for invalid_image in images_to_remove:
            os.remove(invalid_image)
            cleaned_images += 1

    print(f"Cleaned {cleaned_images} invalid images")

def create_data_generator(data_dir):
    data_gen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.2
    )

    train_generator = data_gen.flow_from_directory(
        data_dir,
        target_size=(256, 256),
        batch_size=32,
        class_mode="binary",
        subset="training"
    )

    validation_generator = data_gen.flow_from_directory(
        data_dir,
        target_size=(256, 256),
        batch_size=32,
        class_mode="binary",
        subset="validation"
    )

    return train_generator, validation_generator

def create_test_data_split(data_dir):
    data_gen = ImageDataGenerator(rescale=1./255)
    test_generator = data_gen.flow_from_directory(
        data_dir,
        target_size=(256, 256),
        batch_size=32,
        class_mode="binary"
    )
    return test_generator

def create_model(model_type='VGG16', input_shape=(256, 256, 3)):
    if model_type == 'VGG16':
        base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    elif model_type == 'ResNet50':
        base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    elif model_type == 'DenseNet121':
        base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)
    elif model_type == 'MobileNetV2':
        base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    else:
        raise ValueError("Invalid model type")

    for layer in base_model.layers:
        layer.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=x)
    return model

def train_model(train_generator, validation_generator, model_type='VGG16', epochs=50):
    model = create_model(model_type)
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6),
        ModelCheckpoint(f'best_{model_type}_model.keras', save_best_only=True, monitor='val_accuracy')
    ]

    history = model.fit(train_generator, epochs=epochs, validation_data=validation_generator, callbacks=callbacks)
    return model, history

def evaluate_model(model, test_generator):
    test_loss, test_accuracy = model.evaluate(test_generator)
    print(f'\nTest Loss: {test_loss:.4f}')
    print(f'Test Accuracy: {test_accuracy:.4f}')

    y_pred = model.predict(test_generator)
    y_pred = (y_pred > 0.5).astype(int)
    y_true = test_generator.labels

    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:")
    print(cm)

    cr = classification_report(y_true, y_pred, target_names=test_generator.class_indices.keys())
    print("Classification Report:")
    print(cr)

    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

clean_dataset(data_dir)

train_generator, validation_generator = create_data_generator(data_dir)
test_generator = create_test_data_split(data_dir)

models = ['VGG16', 'ResNet50', 'DenseNet121', 'MobileNetV2']
best_model = None
best_accuracy = 0
histories = {}

for model_type in models:
    print(f"\nTraining {model_type} model...")
    model, history = train_model(train_generator, validation_generator, model_type=model_type, epochs=50)
    histories[model_type] = history
    evaluate_model(model, test_generator)

    test_accuracy = model.evaluate(test_generator)[1]
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_model = model

def predict_on_unseen_image(model, img_path, img_size=(256, 256)):
    img = image.load_img(img_path, target_size=img_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0

    prediction = model.predict(img_array)
    predicted_class = "Class 1" if prediction > 0.5 else "Class 0"
    print(f"Predicted class: {predicted_class}")


drive.mount('/content/drive')
unseen_image_path = '/content/drive/MyDrive/unseendata.jpg'
predict_on_unseen_image(best_model, unseen_image_path)
#predicting on all images wih additional data
import os
from tensorflow.keras.preprocessing import image
import numpy as np

def predict_on_unseen_image_batch(model, folder_path, img_size=(256, 256), limit=100):

    image_extensions = ['.jpeg', '.jpg', '.bmp', '.png']
    image_files = [f for f in os.listdir(folder_path) if any(f.endswith(ext) for ext in image_extensions)]

    image_files = image_files[:limit]

    predictions = []

    for img_file in image_files:
        img_path = os.path.join(folder_path, img_file)


        img = image.load_img(img_path, target_size=img_size)
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = img_array / 255.0


        prediction = model.predict(img_array)
        predicted_class = "Class 1" if prediction > 0.5 else "Class 0"

        predictions.append((img_file, predicted_class))

    return predictions

unseen_folder_path = '/content/drive/MyDrive/Dl+project1/SAD'
predictions = predict_on_unseen_image_batch(best_model, unseen_folder_path, limit=100)

for img_file, predicted_class in predictions:
    print(f"Image: {img_file} | Predicted Class: {predicted_class}")

#Predicting on max 100 images
import os
import random
from tensorflow.keras.preprocessing import image
import numpy as np

def predict_on_unseen_image_batch_from_two_folders(model, folder_path1, folder_path2, img_size=(256, 256), limit=100):

    image_extensions = ['.jpeg', '.jpg', '.bmp', '.png']

    image_files1 = [f for f in os.listdir(folder_path1) if any(f.endswith(ext) for ext in image_extensions)]
    image_files2 = [f for f in os.listdir(folder_path2) if any(f.endswith(ext) for ext in image_extensions)]


    combined_image_files = image_files1 + image_files2


    combined_image_files = random.sample(combined_image_files, min(len(combined_image_files), limit))

    predictions = []

    for img_file in combined_image_files:
        if img_file in image_files1:
            img_path = os.path.join(folder_path1, img_file)
        else:
            img_path = os.path.join(folder_path2, img_file)


        img = image.load_img(img_path, target_size=img_size)
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = img_array / 255.0

        prediction = model.predict(img_array)
        predicted_class = "Class 1" if prediction > 0.5 else "Class 0"

        predictions.append((img_file, predicted_class))

    return predictions

unseen_folder_path1 = '/content/drive/MyDrive/Dl+project1/SAD'
unseen_folder_path2 = '/content/drive/MyDrive/Dl+project1/HAPPY'

predictions = predict_on_unseen_image_batch_from_two_folders(best_model, unseen_folder_path1, unseen_folder_path2, limit=100)

for img_file, predicted_class in predictions:
    print(f"Image: {img_file} | Predicted Class: {predicted_class}")

#Predicting on max 100 images and plotting them
import os
import random
from tensorflow.keras.preprocessing import image
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

def predict_on_unseen_image_batch_from_two_folders(model, folder_path1, folder_path2, img_size=(256, 256), limit=100):
    image_extensions = ['.jpeg', '.jpg', '.bmp', '.png']

    image_files1 = [f for f in os.listdir(folder_path1) if any(f.endswith(ext) for ext in image_extensions)]
    image_files2 = [f for f in os.listdir(folder_path2) if any(f.endswith(ext) for ext in image_extensions)]

    combined_image_files = image_files1 + image_files2


    combined_image_files = random.sample(combined_image_files, min(len(combined_image_files), limit))

    predictions = []
    true_labels = []

    for img_file in combined_image_files:

        if img_file in image_files1:
            img_path = os.path.join(folder_path1, img_file)
            true_label = 1
        else:
            img_path = os.path.join(folder_path2, img_file)
            true_label = 0


        img = image.load_img(img_path, target_size=img_size)
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = img_array / 255.0


        prediction = model.predict(img_array)
        predicted_class = 1 if prediction > 0.5 else 0

        predictions.append(predicted_class)
        true_labels.append(true_label)


    cm = confusion_matrix(true_labels, predictions)


    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix')
    plt.show()

    return predictions, cm

unseen_folder_path1 = '/content/drive/MyDrive/Dl+project1/SAD'
unseen_folder_path2 = '/content/drive/MyDrive/Dl+project1/HAPPY'

predictions, cm = predict_on_unseen_image_batch_from_two_folders(best_model, unseen_folder_path1, unseen_folder_path2, limit=100)


for img_file, predicted_class in zip(os.listdir(unseen_folder_path1)[:100], predictions):
    print(f"Image: {img_file} | Predicted Class: {predicted_class}")

#get to know which classes mapped to 0 and 1
print("Class Indices Mapping:", train_generator.class_indices)

class_mapping = {v: k for k, v in train_generator.class_indices.items()}
print("Numeric Label to Class Name Mapping:", class_mapping)

#plot for training and printing best test accuracy model
plt.figure(figsize=(12, 8))

for model_type, history in histories.items():
    plt.plot(history.history['accuracy'], label=f'{model_type} - Training Accuracy')
    plt.plot(history.history['val_accuracy'], linestyle='--', label=f'{model_type} - Validation Accuracy')

plt.title('Model Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

print(f"\nThe best model is {models[np.argmax([best_accuracy])]} with a test accuracy of {best_accuracy:.4f}")

#futher metrics
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

# Evaluation
def evaluate_model(model, test_generator):
    test_loss, test_accuracy = model.evaluate(test_generator)
    print(f'\nTest Loss: {test_loss:.4f}')
    print(f'Test Accuracy: {test_accuracy:.4f}')

    # Predictions
    y_pred = model.predict(test_generator)
    y_pred = (y_pred > 0.5).astype(int)
    y_true = test_generator.labels

    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:")
    print(cm)

    cr = classification_report(y_true, y_pred, target_names=test_generator.class_indices.keys())
    print("Classification Report:")
    print(cr)

    # Precision, Recall, F1-Score, and ROC-AUC
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_pred)

    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1-Score: {f1:.4f}')
    print(f'ROC-AUC: {roc_auc:.4f}')

    # Plot Confusion Matrix
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

    # Plot ROC Curve
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    roc_auc_value = auc(fpr, tpr)
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='b', label=f'ROC curve (area = {roc_auc_value:.2f})')
    plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc='lower right')
    plt.show()

    # Plot Precision-Recall Curve
    plt.figure(figsize=(8, 6))
    sns.lineplot(x=fpr, y=tpr, label="Precision-Recall Curve", color='g')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc='lower right')
    plt.show()

    # Plot F1-Score vs Threshold
    thresholds_range = np.arange(0.0, 1.1, 0.1)
    f1_scores = [f1_score(y_true, (y_pred > thresh).astype(int)) for thresh in thresholds_range]
    plt.figure(figsize=(8, 6))
    plt.plot(thresholds_range, f1_scores, label='F1-Score vs Threshold', color='r')
    plt.xlabel('Threshold')
    plt.ylabel('F1-Score')
    plt.title('F1-Score vs Threshold')
    plt.legend(loc='best')
    plt.show()

evaluate_model(best_model, test_generator)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Shared folder found: /content/drive/My Drive/Dl+project
Contents of the shared folder:
['HAPPY', 'SAD']
