<a href="https://colab.research.google.com/github/mohmadAyman75/Face-classification/blob/main/VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

import os
import re
import json
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

from sklearn.model_selection import train_test_split


Mounted at /content/drive


In [3]:
!ls "/content/drive/MyDrive/Final_data"

train  train-20251212T212831Z-1-001.zip  val  val-20251212T224639Z-1-001.zip


In [4]:
import zipfile
import os

base_path = "/content/drive/MyDrive/Final_data"

zip_train = os.path.join(base_path, "train-20251212T212831Z-1-001.zip")
zip_val   = os.path.join(base_path, "val-20251212T224639Z-1-001.zip")

extract_train = os.path.join(base_path, "train")
extract_val   = os.path.join(base_path, "val")

# فك ضغط train
if not os.path.exists(extract_train):
    with zipfile.ZipFile(zip_train, 'r') as zip_ref:
        zip_ref.extractall(extract_train)

# فك ضغط val
if not os.path.exists(extract_val):
    with zipfile.ZipFile(zip_val, 'r') as zip_ref:
        zip_ref.extractall(extract_val)

print("Unzip done")


Unzip done


In [5]:
print(os.listdir("/content/drive/MyDrive/Final_data"))
print(os.listdir("/content/drive/MyDrive/Final_data/train"))
print(os.listdir("/content/drive/MyDrive/Final_data/val"))


['train-20251212T212831Z-1-001.zip', 'val-20251212T224639Z-1-001.zip', 'train', 'val']
['train']
['val']


In [6]:
train_path_data = "/content/drive/MyDrive/Final_data/train/train"
val_path_data   = "/content/drive/MyDrive/Final_data/val/val"


In [7]:
print(os.listdir(train_path_data)[:5])
print(os.listdir(val_path_data)[:5])


['n000437', 'Mohamed_eissa', '1Mohmad-Ayman-Suleiman', 'n000479', 'n000433']
['n001125', 'n000836', 'n001197', 'n001021', 'n000774']


In [8]:
def natural_sort_key(text):
    return [int(c) if c.isdigit() else c for c in re.split(r'(\d+)', text)]

def generate_df(dir_path, limit_per_class=None):
    image_path = []
    image_label = []

    for label in sorted(os.listdir(dir_path), key=natural_sort_key):
        label_path = os.path.join(dir_path, label)
        if not os.path.isdir(label_path):
            continue

        images = sorted(os.listdir(label_path), key=natural_sort_key)
        if limit_per_class:
            images = images[:limit_per_class]

        for img in images:
            full_path = os.path.join(label_path, img)
            if os.path.isfile(full_path):
                image_path.append(full_path)
                image_label.append(label)

    return pd.DataFrame({
        "image_path": image_path,
        "label": image_label
    })


In [10]:
train_df = generate_df(train_path_data, limit_per_class=50)
val_df   = generate_df(val_path_data, limit_per_class=50)

train_df = pd.concat([train_df, val_df], ignore_index=True).reset_index(drop=True)
MAX_CLASSES = 50
selected_labels = sorted(train_df['label'].unique())[:MAX_CLASSES]
train_df = train_df[train_df['label'].isin(selected_labels)].reset_index(drop=True)

class_idx = {label: i for i, label in enumerate(sorted(train_df['label'].unique()))}
train_df['label_'] = train_df['label'].map(class_idx)

NUM_CLASSES = train_df['label_'].nunique()
print("Num of classes:", NUM_CLASSES)
with open("class mapping.json", "w") as f:
    json.dump({v: k for k, v in class_idx.items()}, f)


Num of classes: 50


In [11]:
train_df_split, val_df_split = train_test_split(
    train_df,
    test_size=0.2,
    stratify=train_df['label_'],
    random_state=42
)

train_df_split['label_'] = train_df_split['label_'].astype(str)
val_df_split['label_']   = val_df_split['label_'].astype(str)

print(len(train_df_split), len(val_df_split))


2000 500


In [12]:
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 64
EPOCHS = 45

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.15,
    brightness_range=[0.7, 1.3],
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_dataframe(
    train_df_split,
    x_col='image_path',
    y_col='label_',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='sparse',
    shuffle=True
)

validation_generator = val_datagen.flow_from_dataframe(
    val_df_split,
    x_col='image_path',
    y_col='label_',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='sparse',
    shuffle=False
)


Found 1997 validated image filenames belonging to 50 classes.
Found 499 validated image filenames belonging to 50 classes.




In [13]:
base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

for layer in base_model.layers:
    layer.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    verbose=1,
    min_lr=1e-7
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=7,
    restore_best_weights=True,
    verbose=1
)

checkpoint = ModelCheckpoint(
    filepath='best_vgg16_model.h5',
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=False,
    verbose=1
)

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator,
    callbacks=[reduce_lr, early_stop, checkpoint]
)



Epoch 1/45
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20s/step - accuracy: 0.0283 - loss: 7.6921 
Epoch 1: val_loss improved from inf to 4.27455, saving model to best_vgg16_model.h5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m850s[0m 27s/step - accuracy: 0.0285 - loss: 7.6694 - val_accuracy: 0.0621 - val_loss: 4.2746 - learning_rate: 1.0000e-04
Epoch 2/45
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0486 - loss: 5.6859
Epoch 2: val_loss improved from 4.27455 to 3.59877, saving model to best_vgg16_model.h5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 1s/step - accuracy: 0.0488 - loss: 5.6768 - val_accuracy: 0.1363 - val_loss: 3.5988 - learning_rate: 1.0000e-04
Epoch 3/45
[1m 7/32[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m32s[0m 1s/step - accuracy: 0.0574 - loss: 4.8410