In [1]:
from google.colab import drive
import os

drive.mount('/content/drive')

DATASET_PATH = '/content/drive/MyDrive/cvpr_final'

Mounted at /content/drive


In [2]:
from PIL import Image

deleted_count = 0
for subdir, dirs, files in os.walk(DATASET_PATH):
    for file in files:
        filepath = os.path.join(subdir, file)
        try:
            img = Image.open(filepath)
            img.verify()
        except:
            os.remove(filepath)
            deleted_count += 1

print(f"Cleaned up {deleted_count} bad files from local storage.")

Cleaned up 2 bad files from local storage.


In [3]:
import random

MAX_PHOTOS = 25

def balance_dataset(path, limit):
    print(f"Scanning: {path}")
    subfolders = [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]

    for folder in subfolders:
        folder_path = os.path.join(path, folder)
        images = [img for img in os.listdir(folder_path) if img.lower().endswith(('.png', '.jpg', '.jpeg','.jfif'))]

        current_count = len(images)
        print(f"Folder [{folder}]: {current_count} images")

        if current_count > limit:
            diff = current_count - limit
            to_delete = random.sample(images, k=diff)

            for img_name in to_delete:
                os.remove(os.path.join(folder_path, img_name))

            print(f"Trimmed {diff} images. New count: {limit}")
        elif current_count < limit:
            print(f"Below limit. Keeping all {current_count} images.")
        else:
            print(f"balanced")

    print("\nDataset balancing complete")

balance_dataset(DATASET_PATH, MAX_PHOTOS)

Scanning: /content/drive/MyDrive/cvpr_final
Folder [22-48133-2 ]: 21 images
Below limit. Keeping all 21 images.
Folder [22-48005-2]: 23 images
Below limit. Keeping all 23 images.
Folder [22-46887-1]: 17 images
Below limit. Keeping all 17 images.
Folder [22-48725-3]: 0 images
Below limit. Keeping all 0 images.
Folder [22-49355-3]: 20 images
Below limit. Keeping all 20 images.
Folder [22-46590-1]: 20 images
Below limit. Keeping all 20 images.
Folder [22-49331-3]: 0 images
Below limit. Keeping all 0 images.
Folder [22-48682-3]: 0 images
Below limit. Keeping all 0 images.
Folder [22-48666-3]: 0 images
Below limit. Keeping all 0 images.
Folder [22-46983-1]: 18 images
Below limit. Keeping all 18 images.
Folder [22-48091-2]: 21 images
Below limit. Keeping all 21 images.
Folder [23-51308-1]: 8 images
Below limit. Keeping all 8 images.
Folder [22-48833-3]: 20 images
Below limit. Keeping all 20 images.
Folder [22-48915-3]: 0 images
Below limit. Keeping all 0 images.
Folder [21-45902-3]: 20 image

In [4]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = (150, 150)
BATCH_SIZE = 32

datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

train_generator = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

Found 1344 images belonging to 97 classes.
Found 319 images belonging to 97 classes.


In [5]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(train_generator.num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=50
)

model.save('student_recognition_model.h5')
print("Training done. Model saved as 'student_recognition_model.h5'")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m601s[0m 14s/step - accuracy: 0.0143 - loss: 4.5842 - val_accuracy: 0.0125 - val_loss: 4.4873
Epoch 2/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 4s/step - accuracy: 0.0183 - loss: 4.4816 - val_accuracy: 0.0345 - val_loss: 4.3865
Epoch 3/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 4s/step - accuracy: 0.0270 - loss: 4.3693 - val_accuracy: 0.0784 - val_loss: 4.0567
Epoch 4/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 4s/step - accuracy: 0.0512 - loss: 4.1086 - val_accuracy: 0.1003 - val_loss: 3.7529
Epoch 5/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 4s/step - accuracy: 0.0774 - loss: 3.8626 - val_accuracy: 0.1442 - val_loss: 3.5937
Epoch 6/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 4s/step - accuracy: 0.1079 - loss: 3.7138 - val_accuracy: 0.2163 - val_loss: 3.4236
Epoch 7/50
[1m42/42[0m [32m━━━



Training done. Model saved as 'student_recognition_model.h5'


In [6]:
labels = list(train_generator.class_indices.keys())
with open("labels.txt", "w") as f:
    f.write("\n".join(labels))