<a href="https://colab.research.google.com/github/muhammadfawad538/CNN/blob/main/MaleVsFemale.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Female VS Male Prediction**

# **Defining Libraries**

In [None]:
import os
#  shutil ==> shell utility -> for file handling like copying,deleting,move etc
import shutil
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import plot_model
from zipfile import ZipFile

# **Downloading Dataset**

In [None]:
from google.colab import files
files.upload()


Saving kaggle.json to kaggle (2).json


{'kaggle (2).json': b'{"username":"kashafnaveed","key":"0300ba42786a82b3d1de0d348ec7fd99"}'}

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


We can use dataset in 3 ways :
*   By using CLI
*   By using python library
*   By downloading dataset directly from the kaggle to browser


In [None]:
# ==> by using command line interface (CLI)

!kaggle datasets download -d ashwingupta3012/male-and-female-faces-dataset

# ==>BY using python library ->  When we use this we don't have to unzip the file as it gives unzipped file

# import kagglehub

# path = kagglehub.dataset_download("ashwingupta3012/male-and-female-faces-dataset")

# print("Path to dataset files:", path)


Dataset URL: https://www.kaggle.com/datasets/ashwingupta3012/male-and-female-faces-dataset
License(s): CC0-1.0
male-and-female-faces-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


**Unzip Dataset**

In [None]:
!unzip male-and-female-faces-dataset.zip -d male_female_faces

Archive:  male-and-female-faces-dataset.zip
replace male_female_faces/Male and Female face dataset/Female Faces/0 (1).jpeg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

# **Splitting the dataset**
This step includes the splitting of the data into two categories of training data and testing data.20% data is for testing and 80% data for the training.

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

#  Base directory where your "male" and "female" folders are located
base_dir = "/content/male_female_faces/Male and Female face dataset"
classes = ["Male Faces", "Female Faces"]

#  Target directories for split data
target_dir = "/content/split_faces"
train_dir = os.path.join(target_dir, "Training")
test_dir = os.path.join(target_dir, "Testing")

#  Create training/testing subdirectories
for category in classes:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

# Loop through each category and split its images
for category in classes:
    category_path = os.path.join(base_dir, category)
    images = os.listdir(category_path)

    # Split into 80% train and 20% test
    train_imgs, test_imgs = train_test_split(images, test_size=0.2, random_state=42)

    # Copy images to training folder
    for img in train_imgs:
        shutil.copy(os.path.join(category_path, img), os.path.join(train_dir, category, img))

    # Copy images to testing folder
    for img in test_imgs:
        shutil.copy(os.path.join(category_path, img), os.path.join(test_dir, category, img))

print(" Dataset successfully split into train/test sets.")


This step is for converting all the images to RGB

In [None]:
from PIL import Image

def convert_all_images_to_rgb(folder):
    for root, dirs, files in os.walk(folder):
        for filename in files:
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                path = os.path.join(root, filename)
                try:
                    img = Image.open(path)
                    if img.mode != 'RGB':
                        img = img.convert('RGB')
                        img.save(path)
                except Exception as e:
                    print(f"Failed to convert {path}: {e}")

# Apply to training and testing folders
convert_all_images_to_rgb("/content/split_faces/Training")
convert_all_images_to_rgb("/content/split_faces/Testing")


This part is used to check the image size

In [None]:
from PIL import Image
import os

# Path to a folder containing the dataset
dataset_folder = '/content/split_faces'

# Get the first image in the folder (you can modify the loop to check all images)
first_image_path = os.path.join(dataset_folder, 'Male Faces', '0 (1).jpeg')




# **Image Data generator**

Before this part,I have **3 classes** in my training and testing data but it should be 2 classes so, this steps removes the 3rd class which named '.ipynb_checkpoints'

In [None]:
import shutil

# Example: remove if exists
shutil.rmtree(os.path.join(train_dir, ".ipynb_checkpoints"), ignore_errors=True)
shutil.rmtree(os.path.join(test_dir, ".ipynb_checkpoints"), ignore_errors=True)


This steps includes the **normalization and generalization** of the dataset

In [None]:
image_size = (64, 64)
batch_size = 32

# ImageDataGenerator is used to preprocess and augment image data before feeding it into your model. This helps with normalization and generalization.

train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

# **Define CNN Model**

In [None]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    # Dropout helps to prevent overfitting by forcing the model to not depend on any single neuron too much and instead learn more robust features.
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # binary classification
])


In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

# **Setup early stopping**

In [None]:
# Early stopping is a technique used during model training to stop training early if the model's performance stops improving on the validation set. This helps avoid overfitting
# early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# **Train Model**

In [None]:
# Early stopping is a technique used during model training to stop training early if the model's performance stops improving on the validation set. This helps avoid overfitting
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(
    train_generator,
    epochs=10,
    validation_data=test_generator,
    callbacks=[early_stop]
)

# **Evaluate Model**

In [None]:
loss, acc = model.evaluate(test_generator)
print(f"✅ Test Accuracy: {acc * 100:.2f}%")

# **Save Model**

In [None]:
model.save("/content/male_female_cnn_model.h5")
# for downloading the model to local
from google.colab import files
files.download("/content/male_female_cnn_model.h5")

In [None]:
# Plot Accuracy
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.show()

# Plot Loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.show()

# **Testing**

This step includes the **testing of model** from test data

In [None]:
# Predict on first 5 test images
plt.figure(figsize=(12, 10))

# Keep track of the number of images processed
processed_images_count = 0

# Iterate through each class subdirectory in the test folder
for class_name in class_names:
    class_folder = os.path.join('/content/split_faces/Testing', class_name)

    # Check if the path is a directory before listing files
    if os.path.isdir(class_folder):
        # List all files in the class folder and sort them for consistent ordering
        for img_name in sorted(os.listdir(class_folder)):
            # Construct the full image path
            img_path = os.path.join(class_folder, img_name)

            # Check if it's a file and has a common image extension
            if os.path.isfile(img_path) and img_name.lower().endswith(('.png', '.jpg', '.jpeg')):

                # Limit to the first 5 images
                if processed_images_count >= 5:
                    break # Exit the inner loop if 5 images are processed

                try:
                    # Load and preprocess image
                    img = image.load_img(img_path, target_size=(64, 64))
                    img_array = image.img_to_array(img) / 255.0  # Normalizing the image to [0, 1]
                    img_array_expanded = np.expand_dims(img_array, axis=0)  # Adding batch dimension

                    # Prediction
                    prediction = model.predict(img_array_expanded)
                    confidence = float(prediction[0][0])

                    # Interpret prediction (for binary classification)
                    if confidence < 0.5:
                        predicted_label = 'Female Faces'
                        confidence = 1 - confidence  # Confidence for the other class
                    else:
                        predicted_label = 'Male Faces'


                    # Plot image
                    plt.subplot(2, 3, processed_images_count + 1)  # Use processed_images_count for subplot index
                    plt.imshow(img_array)
                    plt.axis('off')

                    # Actual label is the name of the folder it's in (i.e., "Male Faces" or "Female Faces")
                    actual_label = class_name

                    # Display both predicted and actual labels along with confidence
                    plt.title(f"Pred: {predicted_label}\nActual: {actual_label}\nConf: {confidence*100:.2f}%")

                    processed_images_count += 1  # Increment counter

                except Exception as e:
                    print(f"Error processing image {img_path}: {e}")
                    continue # Skip to the next image if there's an error

        if processed_images_count >= 5:
            break  # Exit the outer loop if 5 images are processed

plt.tight_layout()
plt.show()