**Instructions:**

**Loading Data**
- first off, execute this .ipynb file in google colab
- load directory of images (e.g. 1a.jpg, 1b.jpg, etc.) like the SMILE directory that was given to us into a directory of google drive
- mount google drive by execute first cell and give permission for colab to access your files
- copy path of your directory and assign it to the variable "directory_path"

**Executing Code**
- make sure to connect to a T4 GPU by clicking EDIT -> NOTEBOOK SETTINGS, then clicking T4 GPU
- You can then run all cells by clicking RUNTIME -> RUN ALL

In [27]:
# WHEN USING COLAB MAKE SURE TO USE T4 GPU FOR MUCH FASTER PERFORMANCE AND LESS WAIT TIME

import os
import csv
import random
import numpy as np
from PIL import Image
import tensorflow as tf
from google.colab import drive
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_score, recall_score, f1_score

# Mount Google Drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [26]:
class ImageLoader:
    def __init__(self, directory):
        self.directory = directory
        self.images = []
        self.labels = []

    def open_images_from_directory(self):
        """
        Opens all JPEG images from the specified directory on Google Drive.

        """
        for filename in os.listdir(self.directory):
            if filename.endswith('.jpg'):  # Check if the file is a JPEG image
                image_path = os.path.join(self.directory, filename)
                label = "neutral" if filename.endswith('a.jpg') else "happy" if filename.endswith('b.jpg') else "unknown"
                try:
                    im = Image.open(image_path).convert('RGB')
                    im = im.resize((224, 224))
                    im_array = img_to_array(im)  # Convert PIL Image to NumPy array
                    im_array = preprocess_input(im_array)  # Preprocess input for VGG16
                    self.images.append(im_array)
                    self.labels.append(label)
                except Exception as e:
                    print(f"Error loading image '{filename}': {e}")

        self.images = np.array(self.images)
        self.labels = np.array(self.labels)
        return self.images, self.labels


# We imported the SMILE folder into MyDrive and called the directory "data"
directory_path = '/content/drive/MyDrive/data' # Insert path to directory
image_loader = ImageLoader(directory_path)
images, labels = image_loader.open_images_from_directory()


# Accessing loaded images and labels
print("Number of images loaded:", len(image_loader.images))
print("Number of labels loaded:", len(image_loader.labels))

print("\n")

print(type(images), type(labels))

Number of images loaded: 500
Number of labels loaded: 500


<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [23]:
# Assuming 'labels' is a list of string labels ('neutral', 'happy')
# Happy is 0 and Neutral is 1
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(images, encoded_labels, test_size=0.2, random_state=42)

In [28]:
class VGGClassifier:
    def __init__(self, input_shape=(224, 224, 3)):
        self.input_shape = input_shape
        self.vgg_model = VGG16(weights='imagenet', input_shape=input_shape, include_top=False)
        self.vgg_model.trainable = False

    def build_model(self):
        model = Sequential([
            self.vgg_model,
            Flatten(),
            Dense(512, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        return model

    def compile_model(self, model):
        model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def train_model(self, model, X_train, y_train, X_test, y_test, batch_size=32, epochs=10):
        history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))
        return history

    def evaluate_model(self, model, X_test, y_test):
        loss, accuracy = model.evaluate(X_test, y_test)
        print("Test Loss:", loss)
        print("Test Accuracy:", accuracy)
        return loss, accuracy

    def predict(self, model, X_test):
        predictions = model.predict(X_test)
        return predictions

    def calculate_f1_score(self, y_true, y_pred):
        f1score = f1_score(y_true, y_pred)
        print("F1 Score:", f1score)
        return f1score

# Executing model
vgg_classifier = VGGClassifier()
model = vgg_classifier.build_model()
model = vgg_classifier.compile_model(model)
history = vgg_classifier.train_model(model, X_train, y_train, X_test, y_test)
loss, accuracy = vgg_classifier.evaluate_model(model, X_test, y_test)
predictions = vgg_classifier.predict(model, X_test)
binary_predictions = (predictions > 0.5).astype('int32')
f1score = vgg_classifier.calculate_f1_score(y_test, binary_predictions)





Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.07633598893880844
Test Accuracy: 0.9800000190734863
F1 Score: 0.98


In [22]:
# Define the number of folds for cross-validation
k_folds = 10
kf = KFold(n_splits=k_folds)

fpr_list = []
precision_list = []
recall_list = []
f_measure_list = []
roc_auc_list = []
confusion_matrices = []

# Perform cross-validation
for fold, (train_indices, val_indices) in enumerate(kf.split(images)):
    train_images = images[train_indices]
    val_images = images[val_indices]

    train_labels = encoded_labels[train_indices]
    val_labels = encoded_labels[val_indices]

    # Train the model
    model.fit(train_images, train_labels, epochs=5, batch_size=32, validation_data=(val_images, val_labels))

    # Evaluate the model
    val_predictions = model.predict(val_images)
    val_predictions_binary = (val_predictions > 0.5).astype('int32')

    # Calculate evaluation metrics
    confusion_matrix_val = confusion_matrix(val_labels, val_predictions_binary)
    tn, fp, fn, tp = confusion_matrix_val.ravel()
    fpr = fp / (fp + tn)  # Calculate False Positive Rate

    # Calculate evaluation metrics
    precision = precision_score(val_labels, val_predictions_binary)
    recall = recall_score(val_labels, val_predictions_binary)
    f_measure = f1_score(val_labels, val_predictions_binary)
    roc_auc = roc_auc_score(val_labels, val_predictions)
    confusion_matrix_val = confusion_matrix(val_labels, val_predictions_binary)

    # Append metrics to lists
    fpr_list.append(fpr)
    precision_list.append(precision)
    recall_list.append(recall)
    f_measure_list.append(f_measure)
    roc_auc_list.append(roc_auc)
    confusion_matrices.append(confusion_matrix_val)

    # Optionally, print or store individual fold metrics
    print(f"Fold {fold+1} - FPR: {fpr}, Precision: {precision}, Recall: {recall}, F-Measure: {f_measure}, ROC AUC: {roc_auc}")
    print("Confusion Matrix:")
    print(confusion_matrix_val)

# Optionally, compute average or aggregate metrics across folds
avg_fpr = np.mean(fpr_list)
avg_precision = np.mean(precision_list)
avg_recall = np.mean(recall_list)
avg_f_measure = np.mean(f_measure_list)
avg_roc_auc = np.mean(roc_auc_list)
avg_confusion_matrix = np.mean(confusion_matrices, axis=0)

print("\nAverage Metrics Across Folds:")
print(f"Average FPR: {avg_fpr}, Average Precision: {avg_precision}, Average Recall: {avg_recall}, Average F-Measure: {avg_f_measure}, Average ROC AUC: {avg_roc_auc}")
print("Average Confusion Matrix:")
print(avg_confusion_matrix)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fold 1 - FPR: 0.0, Precision: 1.0, Recall: 1.0, F-Measure: 1.0, ROC AUC: 1.0
Confusion Matrix:
[[25  0]
 [ 0 25]]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fold 2 - FPR: 0.0, Precision: 1.0, Recall: 1.0, F-Measure: 1.0, ROC AUC: 1.0
Confusion Matrix:
[[24  0]
 [ 0 26]]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fold 3 - FPR: 0.0, Precision: 1.0, Recall: 1.0, F-Measure: 1.0, ROC AUC: 1.0
Confusion Matrix:
[[26  0]
 [ 0 24]]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fold 4 - FPR: 0.0, Precision: 1.0, Recall: 1.0, F-Measure: 1.0, ROC AUC: 1.0
Confusion Matrix:
[[25  0]
 [ 0 25]]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fold 5 - FPR: 0.0, Precision: 1.0, Recall: 1.0, F-Measure: 1.0, ROC AUC: 1.0
Confusion Matrix:
[[24  0]
 [ 0 26]]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Fold 6 - FPR: 0.0, Precision: 1.0, Recall: 1.0, F-Measure: 1.0, ROC AUC: 1.0
Confusion Matrix:
[[26  0]
 [ 0 24]]
Epoch 1/5
Epoch 