In [1]:
import os
import re
import shutil

input_dir = r"/home/doaa/DL/Faces"
output_dir = r"/home/doaa/DL/Data"

os.makedirs(output_dir, exist_ok=True)

pattern = re.compile(r"(.+?)_\d+\.jpg")

for filename in os.listdir(input_dir):
    if filename.lower().endswith(".jpg"):

        match = pattern.match(filename)
        if not match:
            print("Skipped:", filename)
            continue

        person_name = match.group(1).strip().replace(" ", "_")

        person_folder = os.path.join(output_dir, person_name)
        os.makedirs(person_folder, exist_ok=True)

        src_path = os.path.join(input_dir, filename)
        dst_path = os.path.join(person_folder, filename)

        shutil.copy2(src_path, dst_path)

print("Done! Folders created successfully.")


Done! Folders created successfully.


In [2]:
import os
import shutil
import random

dataset_dir = r"/home/doaa/DL/Data"
output_dir = "processed_dataset"

selected_people = [p for p in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, p))]

print("Number of classes:", len(selected_people))
print(selected_people)

splits = ["train", "val", "test"]

for split in splits:
    for person in selected_people:
        os.makedirs(os.path.join(output_dir, split, person), exist_ok=True)

for person in selected_people:
    person_path = os.path.join(dataset_dir, person)
    images = os.listdir(person_path)

    random.shuffle(images)

    n = len(images)
    train_end = int(0.7 * n)
    val_end = int(0.85 * n)

    split_images = {
        "train": images[:train_end],
        "val": images[train_end:val_end],
        "test": images[val_end:]
    }

    for split, imgs in split_images.items():
        for img in imgs:
            src = os.path.join(person_path, img)
            dst = os.path.join(output_dir, split, person, img)
            shutil.copy2(src, dst)

print("Dataset split completed successfully!")


Number of classes: 31
['Anushka_Sharma', 'Margot_Robbie', 'Elizabeth_Olsen', 'Tom_Cruise', 'Roger_Federer', 'Natalie_Portman', 'Marmik', 'Ellen_Degeneres', 'Virat_Kohli', 'Alexandra_Daddario', 'Andy_Samberg', 'Hugh_Jackman', 'Dwayne_Johnson', 'Henry_Cavill', 'Brad_Pitt', 'Priyanka_Chopra', 'Vijay_Deverakonda', 'Camila_Cabello', 'Akshay_Kumar', 'Robert_Downey_Jr', 'Zac_Efron', 'Hrithik_Roshan', 'Kashyap', 'Courtney_Cox', 'Amitabh_Bachchan', 'Claire_Holt', 'Charlize_Theron', 'Jessica_Alba', 'Billie_Eilish', 'Lisa_Kudrow', 'Alia_Bhatt']
Dataset split completed successfully!


In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

img_size = 160

transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

train_data = datasets.ImageFolder("processed_dataset/train", transform=transform)
val_data   = datasets.ImageFolder("processed_dataset/val", transform=transform)
test_data  = datasets.ImageFolder("processed_dataset/test", transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

print("Classes:", train_data.classes)


Classes: ['Akshay_Kumar', 'Alexandra_Daddario', 'Alia_Bhatt', 'Amitabh_Bachchan', 'Andy_Samberg', 'Anushka_Sharma', 'Billie_Eilish', 'Brad_Pitt', 'Camila_Cabello', 'Charlize_Theron', 'Claire_Holt', 'Courtney_Cox', 'Dwayne_Johnson', 'Elizabeth_Olsen', 'Ellen_Degeneres', 'Henry_Cavill', 'Hrithik_Roshan', 'Hugh_Jackman', 'Jessica_Alba', 'Kashyap', 'Lisa_Kudrow', 'Margot_Robbie', 'Marmik', 'Natalie_Portman', 'Priyanka_Chopra', 'Robert_Downey_Jr', 'Roger_Federer', 'Tom_Cruise', 'Vijay_Deverakonda', 'Virat_Kohli', 'Zac_Efron']


In [4]:
from torchvision import transforms

img_size = 160  # أو 224 حسب الموديل

train_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

test_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])


In [5]:
train_data = datasets.ImageFolder("processed_dataset/train", transform=train_transform)
val_data   = datasets.ImageFolder("processed_dataset/val", transform=test_transform)
test_data  = datasets.ImageFolder("processed_dataset/test", transform=test_transform)


In [6]:
# ----------- DataLoaders -----------
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_data, batch_size=32)
test_loader  = DataLoader(test_data, batch_size=32)

print("Classes:", train_data.classes)
print("Number of training images:", len(train_data))
print("Number of validation images:", len(val_data))
print("Number of test images:", len(test_data))

Classes: ['Akshay_Kumar', 'Alexandra_Daddario', 'Alia_Bhatt', 'Amitabh_Bachchan', 'Andy_Samberg', 'Anushka_Sharma', 'Billie_Eilish', 'Brad_Pitt', 'Camila_Cabello', 'Charlize_Theron', 'Claire_Holt', 'Courtney_Cox', 'Dwayne_Johnson', 'Elizabeth_Olsen', 'Ellen_Degeneres', 'Henry_Cavill', 'Hrithik_Roshan', 'Hugh_Jackman', 'Jessica_Alba', 'Kashyap', 'Lisa_Kudrow', 'Margot_Robbie', 'Marmik', 'Natalie_Portman', 'Priyanka_Chopra', 'Robert_Downey_Jr', 'Roger_Federer', 'Tom_Cruise', 'Vijay_Deverakonda', 'Virat_Kohli', 'Zac_Efron']
Number of training images: 1780
Number of validation images: 384
Number of test images: 398
