## importing required libraries

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# data processing

## function for read and preprocessing images

In [2]:
# Load Haar cascade for face detection this is a model used for identifying faces and giving its coordinates
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
image_size = (224,224)
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    if len(faces) > 0:
        x, y, w, h = faces[0]
        face = image[y:y+h, x:x+w]
        face_resized = cv2.resize(face, image_size)
        # here I am covering the lower side of the image with a black mask so that the model while training focuses on the top features
        face_resized[112:,:] = 0
        return preprocess_input(face_resized)
    else:
        return None

## loading and processing images and saving them as numpy files

In [2]:
# Set paths and parameters
data_dir = '/Users/saatvik/Desktop/secytasks/CVtask/genderclassifier/training_dataset_uploaded'

# Load dataset and preprocess images
data = []
labels = []
for label in ['Male', 'Female']:
    label_dir = os.path.join(data_dir, label)
    for image_name in os.listdir(label_dir):
        image_path = os.path.join(label_dir, image_name)
        preprocessed_image = preprocess_image(image_path)
        if preprocessed_image is not None:
            data.append(preprocessed_image)
            labels.append(0 if label == 'Male' else 1)

data = np.array(data)
labels = np.array(labels)

# saving data so that I wont have to run this cell if the kernal restarts
np.save('data.npy',data)
np.save('labels.npy',labels)
print("phase 1 done")

phase 1 done


## loading data from saved state

In [3]:
loaded_data = np.load('data.npy')
loaded_labels = np.load('labels.npy')

## splitting data into test train data and using data augumentation 

In [4]:
batch_size = 32
# Split dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(loaded_data, loaded_labels, test_size=0.2, random_state=42)


# building the model As I couldn't download VGG16 I am using colab and will upload tha tin separate .ipynb file