In [4]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
maciejgronczynski_biggest_genderface_recognition_dataset_path = kagglehub.dataset_download('maciejgronczynski/biggest-genderface-recognition-dataset')
trainingdatapro_gender_detection_and_classification_image_dataset_path = kagglehub.dataset_download('trainingdatapro/gender-detection-and-classification-image-dataset')

print('Data source import complete.')

Using Colab cache for faster access to the 'biggest-genderface-recognition-dataset' dataset.
Using Colab cache for faster access to the 'gender-detection-and-classification-image-dataset' dataset.
Data source import complete.


In [5]:
import os

# The correct path is in the output of the cell above
data_dir = maciejgronczynski_biggest_genderface_recognition_dataset_path + '/faces'

os.listdir(data_dir)

['man', 'woman']

In [6]:
import tensorflow as tf
import numpy as np
import random
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


In [7]:
SIZE = 128
BATCH_SIZE = 16
AUTOTUNE = tf.data.AUTOTUNE

In [8]:
# The correct path is in the output of the cell above
data_dir = maciejgronczynski_biggest_genderface_recognition_dataset_path + '/faces'
data_test = trainingdatapro_gender_detection_and_classification_image_dataset_path + '/test'

def load_dataset(directory, validation_split=None, subset=None):
    return tf.keras.utils.image_dataset_from_directory(
        directory,
        validation_split=validation_split,
        subset=subset,
        seed=123,
        image_size=(SIZE, SIZE),
        batch_size=BATCH_SIZE)

train_ds = load_dataset(data_dir, validation_split=0.2, subset="training")
val_ds = load_dataset(data_dir, validation_split=0.2, subset="validation")
ds_test = load_dataset(data_test)

class_names = train_ds.class_names
print("Classes:", class_names)

Found 27167 files belonging to 2 classes.
Using 21734 files for training.
Found 27167 files belonging to 2 classes.
Using 5433 files for validation.
Found 80 files belonging to 2 classes.
Classes: ['man', 'woman']


In [9]:
train_size = len(train_ds) * BATCH_SIZE
val_size = len(val_ds) * BATCH_SIZE
test_size = len(ds_test) * BATCH_SIZE

print(f"Training set size: {train_size} images")
print(f"Validation set size: {val_size} images")
print(f"Test set size: {test_size} images")

num_classes = len(class_names)
print(f"Number of classes: {num_classes}")

Training set size: 21744 images
Validation set size: 5440 images
Test set size: 80 images
Number of classes: 2


In [10]:
# Dataset balancing
men, women = [], []
for batch, labels in train_ds:
    for img, label in zip(batch, labels):
        (men if label == 0 else women).append((img, label))

smaller_class_size = min(len(men), len(women))
men_reduced = random.sample(men, smaller_class_size)
balanced_data = men_reduced + women
random.shuffle(balanced_data)

imgs, labels = zip(*balanced_data)
imgs_tensor, labels_tensor = tf.stack(imgs), tf.convert_to_tensor(labels)

balanced_train_ds = tf.data.Dataset.from_tensor_slices((imgs_tensor, labels_tensor))
balanced_train_ds = balanced_train_ds.batch(BATCH_SIZE).cache().prefetch(buffer_size=AUTOTUNE)

In [11]:
# Checking the number of images for each class before balancing
print(f"Number of men before balancing: {len(men)}")
print(f"Number of women before balancing: {len(women)}")

# Quantity after balancing
print(f"Number of men after balancing: {len(men_reduced)}")
print(f"Number of women after balancing: {len(women)}")

# We can also check in the final balanced dataset
balanced_men = sum(1 for _, label in balanced_data if label == 0)
balanced_women = sum(1 for _, label in balanced_data if label == 1)

print(f"Number of men in the balanced dataset: {balanced_men}")
print(f"Number of women in the balanced dataset: {balanced_women}")

Number of men before balancing: 14173
Number of women before balancing: 7561
Number of men after balancing: 7561
Number of women after balancing: 7561
Number of men in the balanced dataset: 7561
Number of women in the balanced dataset: 7561


In [None]:
import cv2
import numpy as np
from IPython.display import display, Image
import random

def plot_images(dataset, class_names):
    # Convert the dataset to a list
    images_list = []
    labels_list = []

    # Populate lists with images and labels
    for images, labels in dataset:
        images_list.append(images.numpy())
        labels_list.append(labels.numpy())

    # Flatten lists for easy random selection
    all_images = np.concatenate(images_list, axis=0)
    all_labels = np.concatenate(labels_list, axis=0)

    # Randomly select 9 images
    random_indices = random.sample(range(len(all_images)), 9)

    for i in random_indices:
        img = all_images[i].astype("uint8")  # Convert image to uint8
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # Convert from RGB to BGR (OpenCV uses BGR)

        # Convert the image to a format that IPython can display
        _, img_encoded = cv2.imencode('.png', img)
        img_display = img_encoded.tobytes()  # Convert to bytes

        # Display the image with IPython
        display(Image(data=img_display, format='png', width=200))

        # Show the class title
        print(f"Class: {class_names[all_labels[i]]}")

plot_images(balanced_train_ds, class_names)

In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
# Create EfficientNetB0 model
base_model = tf.keras.applications.EfficientNetB0(
    weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3)
)
print(f'Total layers in base model: {len(base_model.layers)}')
# Freeze the base initially
base_model.trainable = False
inputs = layers.Input(shape=(SIZE, SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.20)(x)
x = layers.Dense(1024, activation='relu')(x)
outputs = layers.Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l1_l2(l1=1e-4, l2=1e-3))(x)
model = tf.keras.Model(inputs, outputs)
# Initial compilation
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC()]
)
# Define callbacks
early_stopping = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.5,
                              patience=5,
                              verbose=1)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    '/content/best_model.keras', save_best_only=True, monitor='val_loss', mode='min')
# Initial training (only the model head)
history = model.fit(
    balanced_train_ds,
    validation_data=val_ds,
    epochs=5,
    callbacks=[reduce_lr, model_checkpoint]
)
# Fine-Tuning: unfreeze part of the base
print("\nFine Tuning\n")
for layer in base_model.layers[-30:]:
    layer.trainable = True
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC()]
)
# Training with Fine-Tuning
history_fine = model.fit(
    balanced_train_ds,
    validation_data=val_ds,
    epochs=5,
    callbacks=[reduce_lr, early_stopping, model_checkpoint]
)
# After training, save the final model in .h5 format
model.save('/content/final_model.h5', include_optimizer=False)

NameError: name 'SIZE' is not defined

In [None]:
# Evaluation
val_results = model.evaluate(val_ds)
test_results = model.evaluate(ds_test)
print(f"Validation - Loss: {val_results[0]}, Accuracy: {val_results[1]}, AUC: {val_results[2]}")
print(f"Test - Loss: {test_results[0]}, Accuracy: {test_results[1]}, AUC: {test_results[2]}")
# Prediction test
from tensorflow.keras.applications.efficientnet import preprocess_input

def predict_sample(model, dataset, class_names):
    for image_batch, labels_batch in dataset.take(1):
        test_image = image_batch[0]
        test_label = labels_batch[0]

    test_image = preprocess_input(test_image)  # Normalization correction
    test_image = np.expand_dims(test_image, axis=0)
    prediction = model.predict(test_image)

    predicted_class = "Woman" if prediction >= 0.5 else "Man"
    true_class = class_names[int(test_label)]  # True class correction

    plt.imshow(image_batch[0].numpy().astype("uint8"))  # Display image correctly
    plt.axis('off')
    plt.show()

    print(f"Prediction: {predicted_class} (Probability: {prediction[0][0]:.4f})")
    print(f"True label: {true_class}")

predict_sample(model, train_ds, class_names)

In [None]:
predict_sample(model, train_ds, class_names)


In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.efficientnet import preprocess_input
from PIL import Image
# 📌 Load the trained model
model = tf.keras.models.load_model("/content/best_model.keras")
# 📌 Function to load and preprocess the image
def preprocess_image(image_path, target_size=(SIZE, SIZE)):
    image = Image.open(image_path)  # Open image
    image = image.resize(target_size)  # Resize
    image = np.array(image)  # Convert to array
    image = preprocess_input(image)  # Apply EfficientNet preprocessing
    image = np.expand_dims(image, axis=0)  # Add batch dimension

    return image

ValueError: File not found: filepath=/kaggle/working/best_model.keras. Please ensure the file is an accessible `.keras` zip file.

In [3]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications.efficientnet import preprocess_input
from PIL import Image
# 📌 Load the trained model
model = tf.keras.models.load_model("/content/best_model.keras")
# 📌 Function to load and preprocess the image
def preprocess_image(image_path, target_size=(SIZE, SIZE)):
    image = Image.open(image_path)  # Open image
    image = image.resize(target_size)  # Resize
    image = np.array(image)  # Convert to array
    image = preprocess_input(image)  # Apply EfficientNet preprocessing
    image = np.expand_dims(image, axis=0)  # Add batch dimension

    return image
# 📌 Path of the image you want to test
image_path = trainingdatapro_gender_detection_and_classification_image_dataset_path + "/test/men/120.jpg"
#image_path = trainingdatapro_gender_detection_and_classification_image_dataset_path + "/test/women/139.jpg"
# 📌 Load and process the image
image = preprocess_image(image_path)
# 📌 Make the prediction
prediction = model.predict(image)
# 📌 Determine the predicted class
predicted_class = "Woman" if prediction >= 0.5 else "Man"
# 📌 Display the image and result
plt.imshow(Image.open(image_path))
plt.axis('off')
plt.show()
print(f"Prediction: {predicted_class} (Probability: {prediction[0][0]:.4f})")

ValueError: File not found: filepath=/kaggle/working/best_model.keras. Please ensure the file is an accessible `.keras` zip file.

In [4]:
import keras
model_path  = '/content/final_model.h5'
model_test = keras.models.load_model(model_path, compile=False)
model_test.summary()

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = '/kaggle/working/final_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)