# 1. Mount GD and Set The Path






In [None]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
ROOT = "/content/gdrive/MyDrive/IndonesiaAI/Project 1 - Face Recognition/Dataset"
IMG_PATH = ROOT + "//" + "Dataset//Images"
ATTR_PATH = ROOT + "//" + "list_attr_celeba.csv"
seed = 46

# 2. Setting Environment

In [None]:
import os
import pandas as pd
import numpy as np

import seaborn as sns
import cv2
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

# 3. Data Cleaning

In [None]:
attr_df = pd.read_csv(ATTR_PATH)
attr_df.head()

In [None]:
attr_df.info()

In [None]:
attr_df.index

In [None]:
male_df = attr_df.loc[:, ["image_id", "Male"]]
male_df.head()

In [None]:
male_df.replace(-1, 0, inplace=True)

In [None]:
male_df.head()

In [None]:
male_df.describe()

In [None]:
image_files = set(os.listdir(IMG_PATH))
filtered_Male_df = male_df[male_df['image_id'].isin(image_files)]

In [None]:
filtered_Male_df.head()

In [None]:
print("Filtered image :", filtered_Male_df['Male'].count())

In [None]:
# Create a new DataFrame to group data by gender and get the count
gender_count = filtered_Male_df.groupby('Male')['image_id'].count().reset_index(name='count')

# Create the bar graph
sns.barplot(x = 'Male', y = 'count', data=gender_count)

# Replace the numerical values 0 and 1 with 'Female' and 'Male' in the x-axis labels
plt.xticks([0, 1], ['Female', 'Male'])

# Add a title and labels
plt.title('Count of Males and Females')
plt.xlabel('Gender')
plt.ylabel('Count')

# Show the graph
plt.show()

In [None]:
# Count the number of people of each gender
gender_count = filtered_Male_df['Male'].value_counts()

# Calculate the percentage of people of each gender
gender_percentages = (gender_count / filtered_Male_df['Male'].count()) * 100

# Create a bar chart to visualize the percentages
sns.barplot(x=gender_percentages.index, y=gender_percentages.values)

# Add a title and labels to the chart
plt.title('Percentage of People by Gender')
plt.xlabel('Gender')
plt.ylabel('Percentage')

# Show the chart
plt.show()

In [None]:
# Create a dictionary to store the counts and percentages
gender_counts = {}

# Count the number of females and males
female_count = len(filtered_Male_df[filtered_Male_df['Male'] == 0])
male_count = len(filtered_Male_df[filtered_Male_df['Male'] == 1])

# Calculate the percentage of females and males
female_percentage = (female_count / len(filtered_Male_df)) * 100
male_percentage = (male_count / len(filtered_Male_df)) * 100

# Store the counts and percentages in the dictionary
gender_counts['Female'] = {'count': female_count, 'percentage': female_percentage}
gender_counts['Male'] = {'count': male_count, 'percentage': male_percentage}

# Print the gender counts and percentages
for gender in gender_counts:
    print(f"{gender}:")
    print(f"Count: {gender_counts[gender]['count']}")
    print(f"Percentage: {gender_counts[gender]['percentage']:.2f}%")

# 3. Data Loading and Balancing

In [None]:
IMG_EXAM_1 = IMG_PATH + "//" + "000065.jpg"
IMG_EXAM_2 = IMG_PATH + "//" + "000065.jpg"

In [None]:
# example of loading an image with the Keras API
from keras.preprocessing.image import load_img
# load the image
img = load_img(IMG_EXAM_1)
# report details about the image
print(type(img))
print(img.format)
print(img.mode)
print(img.size)
# show the image
img.show()

In [None]:
# example of loading an image with the Keras API
from keras.preprocessing.image import load_img
# load the image
img = load_img(IMG_EXAM_2)
# report details about the image
print(type(img))
print(img.format)
print(img.mode)
print(img.size)
# show the image
img.show()

In [None]:
image = tf.keras.utils.load_img(IMG_EXAM_1)
input_arr = tf.keras.utils.img_to_array(image)
input_arr = np.array([input_arr])  # Convert single image to a batch.
input_arr.shape

In [None]:
from sklearn.model_selection import train_test_split

data = filtered_Male_df.copy(deep=True)
train_data, test_data = train_test_split(data, test_size=0.2, random_state=seed)

In [None]:
train_data['Male'] = train_data['Male'].astype(str)
test_data['Male'] = test_data['Male'].astype(str)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define image dimensions and batch size
image_size = (218, 178)
batch_size = 32

# Create a data generator
datagen = ImageDataGenerator(
    rescale=1.0 / 255,  # Normalize pixel values
    rotation_range=20,  # Data augmentation options (adjust as needed)
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

train_generator = datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=IMG_PATH,
    x_col='image_id',
    y_col='Male',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='training',
    seed=seed,
    shuffle=True  # Shuffle the training data
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=IMG_PATH,
    x_col='image_id',
    y_col='Male',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='validation',
    seed=seed
)

test_generator = datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=IMG_PATH,
    x_col='image_id',
    y_col='Male',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False  # Don't shuffle the test data
)

In [None]:
import matplotlib.pyplot as plt

# Retrieve a batch of images from train_generator
x_batch, y_batch = next(train_generator)

# Select one image from the batch (e.g., the first image)
image_to_display = x_batch[0]

# Display the selected image
plt.imshow(image_to_display)
plt.axis('off')
plt.show()

In [None]:
class_labels = train_generator.class_indices
print("Class labels in train generator:", class_labels)

# 5. [Disclaimer] Model example

## A. VGG19

In [None]:
# import tensorflow as tf
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.applications import VGG19
# from tensorflow.keras import layers, models
# from tensorflow.keras.optimizers import Adam

# # Load the VGG19 pre-trained model (excluding top layers)
# base_model_vgg19 = VGG19(weights='imagenet', include_top=False, input_shape=(218, 178, 3))

# # Create a custom top model for binary classification
# model_vgg19 = models.Sequential()
# model_vgg19.add(base_model_vgg19)
# model_vgg19.add(layers.Flatten())
# model_vgg19.add(layers.Dense(256, activation='relu'))
# model_vgg19.add(layers.Dropout(0.5))
# model_vgg19.add(layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

# # Compile the model
# model_vgg19.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])




In [None]:
# # Train the model
# history1 = model_vgg19.fit(train_generator, epochs=10, validation_data=validation_generator)

In [None]:
# model.save(os.path.join(vgg19_dir, "vgg19_model_v1.h5"))

## B. ResNet101

In [None]:
# import tensorflow as tf
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.applications import ResNet101
# from tensorflow.keras import layers, models
# from tensorflow.keras.optimizers import Adam

# # Load the ResNet-101 pre-trained model (excluding top layers)
# base_model_resnet101 = ResNet101(weights='imagenet', include_top=False, input_shape=(218, 178, 3))

# # Create a custom top model for binary classification
# model_resnet101 = models.Sequential()
# model_resnet101.add(base_model_resnet101)
# model_resnet101.add(layers.Flatten())
# model_resnet101.add(layers.Dense(256, activation='relu'))
# model_resnet101.add(layers.Dropout(0.5))
# model_resnet101.add(layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

# # Compile the model
# model_resnet101.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# # Train the model
# history2 = model_resnet101.fit(train_generator, epochs=10, validation_data=validation_generator)

In [None]:
# model.save(os.path.join(resnet101_dir, "resnet101_model_v1.h5"))

## C. VGG16

In [None]:
# import tensorflow as tf
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.applications import VGG16
# from tensorflow.keras import layers, models
# from tensorflow.keras.optimizers import Adam

# # Load the VGG16 pre-trained model (excluding top layers)
# base_model_vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(218, 178, 3))

# # Create a custom top model for binary classification
# model_vgg16 = models.Sequential()
# model_vgg16.add(base_model_vgg16)
# model_vgg16.add(layers.Flatten())
# model_vgg16.add(layers.Dense(256, activation='relu'))
# model_vgg16.add(layers.Dropout(0.5))
# model_vgg16.add(layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

# # Compile the model
# model_vgg16.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# # Train the model
# history3 = model_vgg16.fit(train_generator, epochs=10, validation_data=validation_generator)

In [None]:
# model.save(os.path.join(vgg16_dir, "vgg16_model_v1.h5"))

## D. ResNet50

In [None]:
# import tensorflow as tf
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.applications import ResNet50
# from tensorflow.keras import layers, models
# from tensorflow.keras.optimizers import Adam

# # Load the ResNet-50 pre-trained model (excluding top layers)
# base_model_resnet50 = ResNet50(weights='imagenet', include_top=False, input_shape=(218, 178, 3))

# # Create a custom top model for binary classification
# model_resnet50 = models.Sequential()
# model_resnet50.add(base_model_resnet50)
# model_resnet50.add(layers.Flatten())
# model_resnet50.add(layers.Dense(256, activation='relu'))
# model_resnet50.add(layers.Dropout(0.5))
# model_resnet50.add(layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

# # Compile the model
# model_resnet50.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:

# # Train the model
# history4 = model_resnet50.fit(train_generator, epochs=10, validation_data=validation_generator)

In [None]:
# model.save(os.path.join(resnet50_dir, "resnet50_model_v1.h5"))

## E. GoogleNet

In [None]:
# import tensorflow as tf
# import tensorflow_hub as hub
# from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten
# from tensorflow.keras.optimizers import Adam

# # Load the InceptionV1 model from TensorFlow Hub with ImageNet weights
# hub_model_url = "https://tfhub.dev/google/imagenet/inception_v1/classification/4"
# base_model_googleNet = hub.KerasLayer(hub_model_url, trainable=False)

# # Create a custom top model for binary classification
# model_googleNet = tf.keras.Sequential([
#     base_model_googleNet,
#     Flatten(),
#     Dense(256, activation='relu'),
#     Dropout(0.5),
#     Dense(1, activation='sigmoid')  # Output layer for binary classification
# ])

# # Compile the model
# model_googleNet.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# # Train the model
# history5 = model_googleNet.fit(train_generator, epochs=10, validation_data=validation_generator)

In [None]:
# model.save(os.path.join(googlenet_dir, "googlenet_model_v1.h5"))

# 6. Plot the Training

In [None]:
# # Define a function to plot training history
# def plot_training_history(history):
#     plt.figure(figsize=(12, 4))

#     plt.subplot(1, 2, 1)
#     plt.plot(history.history['loss'], label='Training Loss')
#     plt.plot(history.history['val_loss'], label='Validation Loss')
#     plt.title('Training and Validation Loss')
#     plt.xlabel('Epoch')
#     plt.ylabel('Loss')
#     plt.legend()

#     plt.subplot(1, 2, 2)
#     plt.plot(history.history['accuracy'], label='Training Accuracy')
#     plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
#     plt.title('Training and Validation Accuracy')
#     plt.xlabel('Epoch')
#     plt.ylabel('Accuracy')
#     plt.legend()

#     plt.show()

In [None]:
# # VGG19
# plot_training_history(history1)

In [None]:
# # ResNet101
# plot_training_history(history2)

In [None]:
# # VGG16
# plot_training_history(history3)

In [None]:
# # ResNet50
# plot_training_history(history4)

In [None]:
# # GoogleNet
# plot_training_history(history5)

# 7. Save The Model

In [None]:
import os

# Define the root directory
save_path = "/content/gdrive/MyDrive/IndonesiaAI/Project 1 - Face Recognition/save_model"

In [None]:
# # Save the trained VGG19 model
# model_vgg19.save(os.path.join(save_path, "vgg19_model.h5"))

# # Save the trained ResNet101 model
# model_resnet101.save(os.path.join(save_path, "resnet101_model.h5"))

# # Save the trained VGG16 model
# model_vgg16.save(os.path.join(save_path, "vgg16_model.h5"))

# # Save the trained ResNet50 model
# model_resnet50.save(os.path.join(save_path, "resnet50_model.h5"))

# # Save the trained GoogleNet/InceptionV1 model
# model_googleNet.save(os.path.join(save_path, "googlenet_model.h5"))



# 8. Model Evalutaion

In [None]:
# Load The model

resnet50 = tf.keras.models.load_model(os.path.join(save_path, "resnet50_model.h5"))
vgg19 = tf.keras.models.load_model(os.path.join(save_path, "vgg19_model.h5"))
vgg16 = tf.keras.models.load_model(os.path.join(save_path, "vgg16_model.h5"))
resnet101 = tf.keras.models.load_model(os.path.join(save_path, "resnet101_model.h5"))

In [None]:
loss, acc = resnet50.evaluate(test_generator, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))

In [None]:
def evaluate_model(model, test_generator):
    # Evaluate the model
    loss, accuracy = model.evaluate(test_generator, verbose=2)
    return accuracy

# Example of usage for each model
resnet50_accuracy = evaluate_model(resnet50, test_generator)
vgg19_accuracy = evaluate_model(vgg19, test_generator)
vgg16_accuracy = evaluate_model(vgg16, test_generator)
resnet101_accuracy = evaluate_model(resnet101, test_generator)
# googlenet_accuracy = evaluate_model(model_googleNet, test_generator)

# Print the accuracies
print(f'ResNet50 - Accuracy: {100 * resnet50_accuracy:.2f}%')
print(f'VGG19 - Accuracy: {100 * vgg19_accuracy:.2f}%')
print(f'VGG16 - Accuracy: {100 * vgg16_accuracy:.2f}%')
print(f'ResNet101 - Accuracy: {100 * resnet101_accuracy:.2f}%')
# print(f'GoogLeNet - Accuracy: {100 * googlenet_accuracy:.2f}%') # Not permanent, cause model can't be loaded

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Define the model names and their corresponding test accuracies
model_names = ["ResNet50", "VGG19", "VGG16", "ResNet101"]
test_accuracies = [resnet50_accuracy, vgg19_accuracy, vgg16_accuracy, resnet101_accuracy]

# Create a dataframe for the data
import pandas as pd
data = pd.DataFrame({'Model': model_names, 'Test Accuracy': test_accuracies})

# Sort the data by test accuracy in descending order
data = data.sort_values(by='Test Accuracy', ascending=True)

# Create a bar plot to compare test accuracies
plt.figure(figsize=(10, 6))
plot = sns.barplot(x='Model', y='Test Accuracy', data=data, palette='viridis')
plt.title('Test Accuracy Comparison of Different Models')
plt.ylim(0, 1.1)  # Set the y-axis limit to represent accuracy in [0, 1]
plt.xlabel('Model')
plt.ylabel('Test Accuracy')
plt.xticks(rotation=45)

# Annotate the bars with accuracy values
for index, row in data.iterrows():
    plot.text(index, row['Test Accuracy'], f'{100 * row["Test Accuracy"]:.2f}%', ha='center', va='bottom')

plt.show()


# Try the evaluation for one model (VGG19)

In [None]:
print(type(test_generator.classes.count(0)))
print(f"0 : {test_generator.classes.count(0)}")
print(f"1 : {test_generator.classes.count(1)}")

In [None]:
print(type(test_generator.classes.count(0)))
print(f"0 : {train_generator.classes.count(0)}")
print(f"1 : {train_generator.classes.count(1)}")

In [None]:
print(type(test_generator.classes.count(0)))
print(f"0 : {validation_generator.classes.count(0)}")
print(f"1 : {validation_generator.classes.count(1)}")

In [None]:
# Define the number of images you want to retrieve
num_images_to_retrieve = 9

# Create a list to store the retrieved images
retrieved_images = []

# Loop to retrieve images
for _ in range(num_images_to_retrieve):
    try:
        x_batch, _ = next(test_generator)
        retrieved_images.append(x_batch[3])  # Assuming you want the first image from each batch
    except StopIteration:
        break

In [None]:
# Make predictions for the retrieved images
predictions = vgg19.predict(np.array(retrieved_images))

# Create a subplot grid to display the images with predictions
fig, axs = plt.subplots(3, 3, figsize=(10, 10))

# Loop to display images with binary classification predictions
for i in range(9):
    ax = axs[i // 3, i % 3]

    # Display the image
    ax.imshow(retrieved_images[i])
    ax.axis('off')

    # Display the binary classification prediction (0 or 1)
    predicted_class = "Male" if predictions[i] >= 0.5 else "Famale"
    ax.set_title(f"Predicted: {predicted_class}")

plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

# Make predictions for the test data using the test generator
predictions = vgg19.predict(test_generator)

# Calculate true labels (ground truth) for the test data
true_labels = test_generator.classes  # Assumes the generator has class labels

# Calculate the confusion matrix
confusion = confusion_matrix(true_labels, np.round(predictions))

# Define class labels (replace with your specific class labels)
class_labels = ['Female', 'Male']

# Create and plot the confusion matrix
plt.figure(figsize=(8, 6))
plt.imshow(confusion, interpolation='nearest', cmap=plt.get_cmap('Blues'))
plt.title('Confusion Matrix : VGG19')
plt.colorbar()

tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)

thresh = confusion.max() / 2.0
for i, j in itertools.product(range(confusion.shape[0]), range(confusion.shape[1])):
    plt.text(j, i, confusion[i, j], horizontalalignment="center", color="white" if confusion[i, j] > thresh else "black")

plt.ylabel('True')
plt.xlabel('Predicted')
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

# Make predictions for the test data using the test generator
predictions = resnet101.predict(test_generator)

# Calculate true labels (ground truth) for the test data
true_labels = test_generator.classes  # Assumes the generator has class labels

# Calculate the confusion matrix
confusion = confusion_matrix(true_labels, np.round(predictions))

# Define class labels (replace with your specific class labels)
class_labels = ['Female', 'Male']

# Create and plot the confusion matrix
plt.figure(figsize=(8, 6))
plt.imshow(confusion, interpolation='nearest', cmap=plt.get_cmap('Blues'))
plt.title('Confusion Matrix : ResNet101')
plt.colorbar()

tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)

thresh = confusion.max() / 2.0
for i, j in itertools.product(range(confusion.shape[0]), range(confusion.shape[1])):
    plt.text(j, i, confusion[i, j], horizontalalignment="center", color="white" if confusion[i, j] > thresh else "black")

plt.ylabel('True')
plt.xlabel('Predicted')
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

# Make predictions for the test data using the test generator
predictions = vgg16.predict(test_generator)

# Calculate true labels (ground truth) for the test data
true_labels = test_generator.classes  # Assumes the generator has class labels

# Calculate the confusion matrix
confusion = confusion_matrix(true_labels, np.round(predictions))

# Define class labels (replace with your specific class labels)
class_labels = ['Female', 'Male']

# Create and plot the confusion matrix
plt.figure(figsize=(8, 6))
plt.imshow(confusion, interpolation='nearest', cmap=plt.get_cmap('Blues'))
plt.title('Confusion Matrix : VGG16')
plt.colorbar()

tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)

thresh = confusion.max() / 2.0
for i, j in itertools.product(range(confusion.shape[0]), range(confusion.shape[1])):
    plt.text(j, i, confusion[i, j], horizontalalignment="center", color="white" if confusion[i, j] > thresh else "black")

plt.ylabel('True')
plt.xlabel('Predicted')
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

# Make predictions for the test data using the test generator
predictions = resnet50.predict(test_generator)

# Calculate true labels (ground truth) for the test data
true_labels = test_generator.classes  # Assumes the generator has class labels

# Calculate the confusion matrix
confusion = confusion_matrix(true_labels, np.round(predictions))

# Define class labels (replace with your specific class labels)
class_labels = ['Female', 'Male']

# Create and plot the confusion matrix
plt.figure(figsize=(8, 6))
plt.imshow(confusion, interpolation='nearest', cmap=plt.get_cmap('Blues'))
plt.title('Confusion Matrix : ResNet50')
plt.colorbar()

tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)

thresh = confusion.max() / 2.0
for i, j in itertools.product(range(confusion.shape[0]), range(confusion.shape[1])):
    plt.text(j, i, confusion[i, j], horizontalalignment="center", color="white" if confusion[i, j] > thresh else "black")

plt.ylabel('True')
plt.xlabel('Predicted')
plt.tight_layout()
plt.show()

# Experiment

In [None]:
X, y = next(test_generator)

In [None]:
print(X[25].shape)
print(y[25])

In [None]:
img_m = X[4]

In [None]:
img_f = X[25]

In [None]:
plt.imshow(img_f)
plt.axis('off')  # Hide axis labels and ticks
plt.show()

In [None]:
image_f = np.expand_dims(img_f, axis=0)
result = vgg19.predict(image_f)

In [None]:
print(result[0])

In [None]:
probability_male = result  # Probability of being "Male"
probability_female = 1 - result  # Probability of being "Female"

In [None]:
print("Probability of being 'Male':", probability_male)
print("Probability of being 'Female':", probability_female)

In [None]:
plt.imshow(img_m)
plt.axis('off')  # Hide axis labels and ticks
plt.show()

In [None]:
image_m = np.expand_dims(img_m, axis=0)
result = vgg19.predict(image_m)

In [None]:
result[0]

In [None]:
probability_male = result  # Probability of being "Male"
probability_female = 1 - result  # Probability of being "Female"

In [None]:
print("Probability of being 'Male':", probability_male)
print("Probability of being 'Female':", probability_female)