##Install Deepface

In [None]:
# Need to install deepface if not already installed in the notebook
!pip install deepface

##Connect Drive

In [None]:
from google.colab import drive  # Module to interact with Google Drive in Google Colab
drive.mount('/content/drive')  # Mount Google Drive to access files stored in the drive

##Import Libraries

In [None]:
import cv2  # OpenCV library for image and video processing
import os  # Provides functions to interact with the operating system
import matplotlib.pyplot as plt  # Library for creating visualizations and plots
import pandas as pd  # Data manipulation and analysis library
import pickle  # For serializing and deserializing Python object structures
import joblib  # Library for saving and loading machine learning models
import shutil  # High-level file operations like copying and removal
import numpy as np  # Library for numerical computations and array operations

from sklearn.svm import SVC  # Support Vector Classification
from copy import deepcopy  # Create deep copies of objects
from sklearn.model_selection import train_test_split  # Split arrays or matrices into random train and test subsets
from sklearn.metrics import accuracy_score  # For calculating accuracy classification score
from sklearn.preprocessing import StandardScaler  # For standardizing features by removing the mean and scaling to unit variance
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis  # Linear Discriminant Analysis
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img  # Utilities for image preprocessing and augmentation
from skimage import exposure  # For histogram equalization and other exposure adjustment techniques
from deepface import DeepFace  # Deep learning library for face recognition and facial attribute analysis
from sklearn.pipeline import Pipeline  # Pipeline utility for simplifying the creation of machine learning workflows
from sklearn.decomposition import PCA  # Principal Component Analysis for dimensionality reduction

##Image Augmentation

In [None]:
# # Define root directory and subdirectories
# root_dir = "/content/drive/MyDrive/SP24-CSEN240-2/Project/Train"
# data_dir = root_dir
# #data_dir = root_dir + "/Training"
# label_file = data_dir + "/labels.txt"
# output_dir = root_dir + "/Augmented_images"

# # Load label mappings from a space-separated text file
# label_mapping = {}
# with open(label_file, 'r') as file:
#     for line in file:
#         # Read filename and label from each line, and store in dictionary
#         filename, label = line.strip().split(' ')
#         label_mapping[filename + ".jpg"] = label
# print(label_mapping)

# # Initialize the ImageDataGenerator with desired augmentations
# datagen = ImageDataGenerator(
#     rotation_range=5,
#     rescale=1./255,
#     zoom_range=0.2,
#     brightness_range=(0.5, 1.0),
#     horizontal_flip=True,
#     fill_mode='nearest'
# )

# # Function to save augmented images in label-specific folders
# def save_augmented_images(directory, output_directory, label_mapping, num_augmented_images=5):
#     for filename in os.listdir(directory):
#         if filename in label_mapping:  # Check if the file has a mapping
#             label = label_mapping[filename]  # Get the label for the current file
#             label_dir = os.path.join(output_directory, label)  # Define label-specific directory path

#             if not os.path.exists(label_dir):
#                 os.makedirs(label_dir)  # Create the directory if it doesn't exist

#             file_path = os.path.join(directory, filename)
#             image = load_img(file_path)  # Load the image
#             image_array = img_to_array(image)  # Convert image to array
#             image_array = image_array.reshape((1,) + image_array.shape)

#             # Generate and save augmented images
#             i = 0
#             save_prefix = os.path.splitext(filename)[0]  # Use filename without extension as save prefix
#             for batch in datagen.flow(image_array, batch_size=1, save_to_dir=label_dir, save_prefix=save_prefix, save_format='jpeg'):
#                 i += 1
#                 if i >= num_augmented_images:
#                     break  # Limit the number of augmented images generated per original image

In [None]:
# Call the function to start the augmentation process
#save_augmented_images(data_dir, output_dir, label_mapping)

##Data Cleaning

In [None]:
# Function to detect faces in an image
def detect_faces(img: np.ndarray) -> np.ndarray:
    try:
        # Use DeepFace to extract faces, returning the first detected face
        img = DeepFace.extract_faces(img)[0]['face']
        return img
    except ValueError:
        # If no face is detected, return an empty array
        return np.empty(shape=(0,))

##Load images from folders

In [None]:
def load_images_from_folder(folder: str, file_map: map) -> list:
    print("loading images from folder")

    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                labels.append(file_map[filename])

    return images, labels

def preprocess_images(images: list, face_detection: bool) -> list:
    processed_images = []
    print("pre - processing images")
    wrongCrop = 0
    for i,img in enumerate(images):
        backup = deepcopy(img)
        # img = cv2.resize(img, (200, 200))
        if face_detection == True:
            img = detect_faces(img)
            if img.size == 0:  # Check if the image is empty after face cropping
                wrongCrop += 1
                img = backup
        img = cv2.resize(img, (128, 128))
        processed_images.append(img)
    return processed_images

def load_labels_from_file(mapping_file):
    label_map = {}
    with open(mapping_file, 'r') as file:
        for line in file:
            filename, label = line.strip().split(' ')
            label_map[filename + ".jpg"] = label
    return label_map

def load_keras_images(path):
    folders = os.listdir(path)
    labels = []
    images = []
    for name in folders:
        folder_path = os.path.join(path,name)
        img_path = os.listdir(folder_path)
        for img in img_path:
            image = cv2.imread(os.path.join(folder_path, img))
            images.append(image)
            labels.append(name)
    return images, labels

def load_images(path:str, Face_detect = False, keras = False) -> np.array:
    # Load images from a folder
    images = []
    if not keras:
        # Load labels from the mapping file
        mapping_file_path = os.path.join(path,"labels.txt")
        label_map = load_labels_from_file(mapping_file_path)
        images, labels = load_images_from_folder(path, label_map)
    else:
        images, labels = load_keras_images(path)
    # Preprocess the images
    preprocessed_images = preprocess_images(images, Face_detect)
    # Convert preprocessed images to numpy array
    data = np.array(preprocessed_images)
    # Reshape data to flatten
    data = data.reshape(len(data),-1)
    return data, labels

##Load Data

In [None]:
# Set flag for face detection
Face_detect = True

# Define the root path for the project
root_path = "/content/drive/MyDrive/SP24-CSEN240-2/Project/Temp/Group-12"

# Define the path for training images
train_path = root_path + "/Augmented_images"

# Define the path for validation images
#validation_path = root_path + "/Validation"

# Load training images and labels, with face detection enabled and Keras format
data, labels = load_images(train_path, Face_detect, keras=True)

# Load validation images and labels, with face detection enabled
#validation_data, validation_labels = load_images(validation_path, Face_detect)

##Save the Data and Validation file for easy read and write.

In [None]:
# # Save Data
# root_path = "/content/drive/MyDrive/ML_Project"

# # Save training data as a .npy file
# np.save(root_path + '/Training_data.npy', data)

# # Save validation data as a .npy file
# np.save(root_path + '/Validation_data.npy', validation_data)

# # Save training data labels as a .pkl file
# with open(root_path + '/Training_data_labels.pkl', 'wb') as f:
#     pickle.dump(labels, f)

# # Save validation data labels as a .pkl file
# with open(root_path + '/Validation_data_labels.pkl', 'wb') as f:
#     pickle.dump(validation_labels, f)

In [None]:
# # Load Data
# # Load training and testing data from .npy files
# train_test_data_load = np.load(root_path + '/Training_data.npy')

# # Load validation data from .npy file
# validation_data_load = np.load(root_path + '/Validation_data.npy')

# # Load training and testing labels from .pkl file
# with open(root_path + '/Training_data_labels.pkl', 'rb') as f:
#     train_test_labels_load = pickle.load(f)

# # Load validation labels from .pkl file
# with open(root_path + '/Validation_data_labels.pkl', 'rb') as f:
#     validation_labels_load = pickle.load(f)

##ML Model

In [None]:
# Split the data into training and testing sets
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Create the pipeline
pip = Pipeline([
    ('scaler', StandardScaler()),  # Apply standard scaling to the data
    ('pca', PCA(n_components=600)),  # Perform PCA with 600 components
    ('lda', LinearDiscriminantAnalysis(n_components=34)),  # Perform LDA with 34 components
    ('svm', SVC(C=32))  # Support Vector Classification with C=32
])

# Fit the pipeline on the training data
pip.fit(data_train, labels_train)

# Evaluate the model on the validation data
#score = pip.score(validation_data, validation_labels)

# Print the score
#print(score)

In [None]:
# Calculate the score of the model using the test data and labels
score = pip.score(data_test, labels_test)

# Print the score
print(score)

##Save and Load Model

In [None]:
# # Save the trained pipeline
# joblib.dump(pip, '/content/drive/MyDrive/ML_Project/trained_model.pkl')

In [None]:
# # Load the trained pipeline
# Fr_model = joblib.load('/content/drive/MyDrive/ML_Project/trained_model.pkl')

##Predict

In [None]:
# # Run the cell if there is no labels file.

# # Define the folder path
# folder_path = "/content/drive/MyDrive/SP24-CSEN240-2/Project/Test"

# # Initialize an empty list to store file names
# file_list = []

# # Iterate through all files in the folder
# for filename in os.listdir(folder_path):
#     # Add filename and "None" to the list as a tab-separated string
#     file_list.append(os.path.splitext(filename)[0] + " none")

# # Define the output file path
# output_file = "/content/drive/MyDrive/SP24-CSEN240-2/Project/Test/labels.txt"

# # Write the file names and "None" to the output file
# with open(output_file, 'w') as f:
#     f.write("\n".join(file_list))

In [None]:
# Define the path to the folder containing the test images
prediction_path = "/content/drive/MyDrive/SP24-CSEN240-2/Project/Test"

# Load the test images and their labels
prediction_data, prediction_labels = load_images(prediction_path, Face_detect)

In [None]:
# Predict on the validation data
predictions = pip.predict(prediction_data)

##Output the predictions

In [None]:
# Define the output file path
output_file = "/content/drive/MyDrive/SP24-CSEN240-2/Project/Prediction/group-12.txt"
# Write the array to the output file
with open(output_file, 'w') as f:
    for name in predictions:
        f.write(name + "\n")
print("Array saved to:", output_file)

#END