In [5]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [6]:

cascade_dir = cv2.data.haarcascades

cascade_file = os.path.join(cascade_dir, 'haarcascade_frontalface_default.xml')

if os.path.isfile(cascade_file):
    print("Haar cascade file found:", cascade_file)
else:
    print("Haar cascade file not found. Downloading...")
    cv2_base_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/"
    cascade_url = cv2_base_url + 'haarcascade_frontalface_default.xml'
    os.system(f"wget {cascade_url} -P {cascade_dir}")
    print("Haar cascade file downloaded successfully.")
filter_path = cascade_file

Haar cascade file found: /opt/conda/lib/python3.10/site-packages/cv2/data/haarcascade_frontalface_default.xml


In [8]:
# Function to detect faces and crop them from an image
def detect_and_crop_faces(image):
    face_cascade = cv2.CascadeClassifier(filter_path)
    faces = face_cascade.detectMultiScale(image, 1.3, 5)
    cropped_faces = []
    for (x, y, w, h) in faces:
        cropped_faces.append(image[y:y+h, x:x+w])
    return cropped_faces


In [9]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

dataset_dir = "/kaggle/input/face-recognition-dataset/Original Images/Original Images"
cropped_dataset_dir = "/kaggle/working/CroppedImages"
train_dir = os.path.join(cropped_dataset_dir, "train")
test_dir = os.path.join(cropped_dataset_dir, "test")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

common_size = (128, 128)

for subdir in os.listdir(dataset_dir):
    subdir_path = os.path.join(dataset_dir, subdir)
    if os.path.isdir(subdir_path):
        train_subdir_path = os.path.join(train_dir, subdir)
        test_subdir_path = os.path.join(test_dir, subdir)
        os.makedirs(train_subdir_path, exist_ok=True)
        os.makedirs(test_subdir_path, exist_ok=True)
        image_files = [f for f in os.listdir(subdir_path) if f.endswith('.jpg')]
        for image_name in image_files:
            image_path = os.path.join(subdir_path, image_name)
            img = cv2.imread(image_path)
            faces = detect_and_crop_faces(img)
            for idx, face in enumerate(faces):
                if face is not None:
                    resized_face = cv2.resize(face, common_size)
                    if np.random.rand() < 0.9:
                        save_path = os.path.join(train_subdir_path, f"{image_name}_{idx}.jpg")
                    else:
                        save_path = os.path.join(test_subdir_path, f"{image_name}_{idx}.jpg")
                    cv2.imwrite(save_path, resized_face)
print('dataset created')

dataset created


In [16]:
gpus = tf.config.list_physical_devices('GPU')
if not gpus:
    raise RuntimeError("No GPU device found.")
    
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [17]:
train_dir = '/kaggle/working/CroppedImages/train'
test_dir = '/kaggle/working/CroppedImages/test'
generator = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.1
)
train_ds = generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode = "sparse",
    subset="training"  
)

val_ds = generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    
    class_mode = "sparse",
    subset="validation"
)

classes = list(train_ds.class_indices.keys())
print("Classes in training data:", classes)

test_ds = generator.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode = "sparse",
    subset="training"
)

test_classes = list(test_ds.class_indices.keys())
print("Classes in test data:", test_classes)


Found 2047 images belonging to 31 classes.
Found 213 images belonging to 31 classes.
Classes in training data: ['Akshay Kumar', 'Alexandra Daddario', 'Alia Bhatt', 'Amitabh Bachchan', 'Andy Samberg', 'Anushka Sharma', 'Billie Eilish', 'Brad Pitt', 'Camila Cabello', 'Charlize Theron', 'Claire Holt', 'Courtney Cox', 'Dwayne Johnson', 'Elizabeth Olsen', 'Ellen Degeneres', 'Henry Cavill', 'Hrithik Roshan', 'Hugh Jackman', 'Jessica Alba', 'Kashyap', 'Lisa Kudrow', 'Margot Robbie', 'Marmik', 'Natalie Portman', 'Priyanka Chopra', 'Robert Downey Jr', 'Roger Federer', 'Tom Cruise', 'Vijay Deverakonda', 'Virat Kohli', 'Zac Efron']
Found 254 images belonging to 31 classes.
Classes in test data: ['Akshay Kumar', 'Alexandra Daddario', 'Alia Bhatt', 'Amitabh Bachchan', 'Andy Samberg', 'Anushka Sharma', 'Billie Eilish', 'Brad Pitt', 'Camila Cabello', 'Charlize Theron', 'Claire Holt', 'Courtney Cox', 'Dwayne Johnson', 'Elizabeth Olsen', 'Ellen Degeneres', 'Henry Cavill', 'Hrithik Roshan', 'Hugh Jackma

In [20]:
import tensorflow as tf

PATCH_SIZE = 16
IMAGE_SIZE = [128, 128, 3]  
num_patches = (IMAGE_SIZE[0] // PATCH_SIZE) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [
    projection_dim * 2,
    projection_dim,
]
transformer_layers = 8
mlp_head_units = [2048, 1024]
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = tf.keras.layers.Dense(units, activation=tf.nn.gelu)(x)
        x = tf.keras.layers.Dropout(dropout_rate)(x)
    return x
class Patches(tf.keras.layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

class PatchEncoder(tf.keras.layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = tf.keras.layers.Dense(units=projection_dim)
        self.position_embedding = tf.keras.layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded
def create_vit_classifier(input_shape=(128, 128, 3)):
    inputs = tf.keras.layers.Input(shape=input_shape)
    patches = Patches(PATCH_SIZE)(inputs)
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    print("Patches shape:", patches.shape)
    print("Encoded patches shape:", encoded_patches.shape)

    for _ in range(transformer_layers):
        x1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = tf.keras.layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        x2 = tf.keras.layers.Add()([attention_output, encoded_patches])
        x3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x2)
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        encoded_patches = tf.keras.layers.Add()([x3, x2])

    representation = tf.keras.layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = tf.keras.layers.Flatten()(representation)
    representation = tf.keras.layers.Dropout(0.5)(representation)
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    logits = tf.keras.layers.Dense(31)(features) 

    print("Logits shape:", logits.shape)

    model = tf.keras.Model(inputs=inputs, outputs=logits)
    return model

model = create_vit_classifier()
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['sparse_categorical_accuracy']
)


Patches shape: (None, None, 768)
Encoded patches shape: (None, 64, 64)
Logits shape: (None, 31)


In [21]:

batch_data, batch_labels = next(val_ds)
data_shape = batch_data.shape
labels_shape = batch_labels.shape

print("Data shape:", data_shape)
print("Labels shape:", labels_shape)


Data shape: (32, 128, 128, 3)
Labels shape: (32,)


In [22]:
BATCH_SIZE = 16

history = model.fit(train_ds, epochs=100, validation_data=val_ds, batch_size=BATCH_SIZE)

Epoch 1/100
[1m 1/64[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:20:55[0m 77s/step - loss: 6.0996 - sparse_categorical_accuracy: 0.0625

I0000 00:00:1714201441.015816     121 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 1s/step - loss: 5.3227 - sparse_categorical_accuracy: 0.0349 - val_loss: 3.4467 - val_sparse_categorical_accuracy: 0.0469
Epoch 2/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 161ms/step - loss: 3.5836 - sparse_categorical_accuracy: 0.0356 - val_loss: 3.4320 - val_sparse_categorical_accuracy: 0.0704
Epoch 3/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 165ms/step - loss: 3.5102 - sparse_categorical_accuracy: 0.0352 - val_loss: 3.3990 - val_sparse_categorical_accuracy: 0.0610
Epoch 4/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 163ms/step - loss: 3.4540 - sparse_categorical_accuracy: 0.0488 - val_loss: 3.2933 - val_sparse_categorical_accuracy: 0.0751
Epoch 5/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 172ms/step - loss: 3.3818 - sparse_categorical_accuracy: 0.0543 - val_loss: 3.2211 - val_sparse_categorical_accuracy: 0.1080
Epoc

In [26]:
train_loss, train_accuracy = model.evaluate(train_ds)
print(f"Training Accuracy: {train_accuracy*100: .2f}")

[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 198ms/step - loss: 0.1008 - sparse_categorical_accuracy: 0.9708
Training Accuracy:  97.61


In [35]:
validation_loss, validation_accuracy = model.evaluate(val_ds)
print(f"Validation Accuracy: {validation_accuracy*100: .2f}")

Validation Accuracy:  76.27
