In [None]:
import cv2 as cv
import pandas as pd
import glob
import torch
import random
import time
import os
import csv
import numpy as np
import tensorflow as tf
import pdb
import matplotlib.pyplot as plt
import codecs

from facenet_pytorch import MTCNN
from tensorflow import keras
from matplotlib import pyplot as plt
from tqdm import tqdm
from PIL import Image

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,ReLU,BatchNormalization, SeparableConv2D 
from facenet_pytorch import MTCNN, InceptionResnetV1
from tensorflow.keras.models import load_model

In [None]:
print(tf.config.list_physical_devices())

In [None]:
train_small_csv_paths = 'C:\\Users\\96291\\Desktop\\Mini_challenge\\un_ziped\\purdue-face-recognition-challenge-2024\\train.csv'
# contains training images with file names of the form n.jpg
category_path = 'C:\\Users\\96291\\Desktop\\Mini_challenge\\un_ziped\\purdue-face-recognition-challenge-2024\\category.csv'


train_data = pd.read_csv(train_small_csv_paths, encoding = 'latin1') 
numerical_labels = pd.read_csv(category_path, encoding = 'ISO-8859-1')
print(numerical_labels)

# Create a dictionary for category to numerical label mapping
#categpry should be at lefe, because use category to find the numerical_labels. 
category_to_label = dict(zip( numerical_labels['Category'], numerical_labels['Unnamed: 0']))

# Load image data and replace string labels with numerical labels
train_data['NumericalLabel'] = train_data['Category'].map(category_to_label)
print(train_data)


In [None]:
# create labels for each file in cvs file
train_y = train_data['NumericalLabel']
print(train_y)

In [None]:
# Assuming 'train_data' is your DataFrame containing all the data
train_df, val_df = train_test_split(train_data, test_size=0.2, random_state=42)
#split for training and validation. 

# Enhanced data augmentation for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Only rescaling for validation data, no augmentation
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='C:\\Users\\96291\\Desktop\\Mini_challenge\\un_ziped\\test_croped_face',
    x_col='File Name',
    y_col='NumericalLabel',
    target_size=(128, 128),
    batch_size=32,
    class_mode='raw',
    shuffle=True
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory='C:\\Users\\96291\\Desktop\\Mini_challenge\\un_ziped\\test_croped_face',
    x_col='File Name',
    y_col='NumericalLabel',
    target_size=(128, 128),
    batch_size=32,
    class_mode='raw',
    shuffle=True
)


In [None]:
face_recg = Sequential([
    Conv2D(64, (3, 3), padding = 'same', input_shape=(128, 128, 3)),
    ReLU(),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(64, (3, 3), padding = 'same'),
    ReLU(),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(128, (3, 3), padding = 'same'),
    ReLU(),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(128, (3, 3), padding = 'same'),
    ReLU(),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(256, (3, 3), padding = 'same'),
    ReLU(),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(256, (3, 3), padding = 'same'),
    ReLU(),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(512, (3, 3), padding = 'same'),
    ReLU(),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    # Conv2D(512, (3, 3), padding = 'same'),
    # ReLU(),
    # BatchNormalization(),
    # MaxPooling2D(2, 2),

    # Conv2D(1024, (3, 3), padding = 'same'),
    # ReLU(),
    # BatchNormalization(),
    # MaxPooling2D(2, 2),


    Flatten(),
    #Dense(512),
    ReLU(),
    Dropout(0.5),
    BatchNormalization(),
    Dense(100, activation='softmax')  # Assuming 100 classes from 0-99
])

face_recg.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
face_recg.fit(train_generator, epochs=70, validation_data= val_generator)

In [None]:
# Load the trained model
model = face_recg

# Initialize MTCNN for face detection
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(keep_all=False, device=device)

# Directory containing test images
test_images_folder = 'C:\\Users\\96291\\Desktop\\Mini_challenge\\un_ziped\\test'

# Load category names from your category mapping file
category_df = pd.read_csv('C:\\Users\\96291\\Desktop\\Mini_challenge\\un_ziped\\purdue-face-recognition-challenge-2024\\category.csv')
category_mapping = dict(zip(category_df['Unnamed: 0'], category_df['Category']))

# Process images and classify
results = []
# test_images = sorted(os.listdir(test_images_folder))  # Sort to ensure order
test_images = sorted([file for file in os.listdir(test_images_folder) if not file.startswith('._')])

for img_name in test_images:
    if img_name.lower().endswith('.jpg'):
        img_path = os.path.join(test_images_folder, img_name)
        img = Image.open(img_path).convert('RGB')

        # Detect the face
        boxes, _ = mtcnn.detect(img)
        if boxes is not None and len(boxes) > 0:
            # Crop the detected face
            face = img.crop(boxes[0])
            face = face.resize((128, 128))
            face_array = np.array(face) / 255.0
            face_array = np.expand_dims(face_array, axis=0)

            # Classify the face
            prediction = model.predict(face_array)
            predicted_index = np.argmax(prediction, axis=1)[0]
            predicted_category = category_mapping.get(predicted_index, 'Unknown')
        else:
            predicted_category = 'Unknown'

        file_id = int(os.path.splitext(img_name)[0])  # Convert filename to integer for sorting
        results.append({'Id': file_id, 'Category': predicted_category})



In [None]:
# Sort results by 'Id' to ensure correct order
results_df = pd.DataFrame(results).sort_values(by='Id')
results_df['Id'] = results_df['Id'].astype(str)  # Convert back to string if necessary

# Save the DataFrame to a CSV file
results_df.to_csv('C:\\Users\\96291\\Desktop\\Mini_challenge\\un_ziped\\predict.csv', index=False)