In [None]:
pip install MTCNN

Collecting MTCNN
  Downloading mtcnn-0.1.1-py3-none-any.whl.metadata (5.8 kB)
Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: MTCNN
Successfully installed MTCNN-0.1.1


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from mtcnn import MTCNN
from google.colab import drive
from tensorflow.keras.applications import InceptionResNetV2
from keras.applications import ResNet50

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
extracted_path = '/content/drive/MyDrive/student-classification/Extracted_Dataset1'

dataset_path = '/content/drive/MyDrive/student-classification/Combined_Dataset'

if not os.path.exists(extracted_path):
    os.makedirs(extracted_path)

In [None]:
# for category in os.listdir(dataset_path):
#     category_path = os.path.join(dataset_path, category)
#     if os.path.isdir(category_path):
#         for image_file in os.listdir(category_path):
#             image_path = os.path.join(category_path, image_file)
#             image = Image.open(image_path)
#             pixels = np.array(image)
#             detector = MTCNN()
#             results = detector.detect_faces(pixels)
#             if len(results) == 0:
#                 face_array = None
#             else:
#               x1, y1, width, height = results[0]['box']
#               x1, y1 = abs(x1), abs(y1)
#               x2, y2 = x1 + width, y1 + height

#               face = pixels[y1:y2, x1:x2]
#               image = Image.fromarray(face)
#               image = image.resize((160, 160))
#               face_array = img_to_array(image)
#             if face_array is not None:
#                 save_path = os.path.join(extracted_path, category)
#                 if not os.path.exists(save_path):
#                     os.makedirs(save_path)
#                 face_image = Image.fromarray(np.uint8(face_array))
#                 face_image.save(os.path.join(save_path, image_file))

# print("Selesai Melakukan extraksi wajah")


In [None]:

augment_datagen = ImageDataGenerator(
    rescale=1.0/255,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    brightness_range=[0.8, 1.2],
    channel_shift_range=30.0,
    fill_mode='nearest'
)

def augment_images(input_folder, num_augmented_images):
    image_files = [f for f in os.listdir(input_folder) if os.path.isfile(os.path.join(input_folder, f))]
    num_original_images = len(image_files)
    num_augmentations_per_image = (num_augmented_images - num_original_images) // num_original_images

    for image_file in image_files:
        img_path = os.path.join(input_folder, image_file)
        img = load_img(img_path)
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)


        aug_iter = augment_datagen.flow(x, batch_size=1)


        for i in range(num_augmentations_per_image):
            batch = next(aug_iter)
            augmented_img = batch[0]
            augmented_img = np.uint8(augmented_img * 255)
            augmented_img_path = os.path.join(input_folder, f"aug_{i}.jpg")
            augmented_img_pil = Image.fromarray(augmented_img)
            augmented_img_pil.save(augmented_img_path)


    remaining_augments = num_augmented_images - num_original_images * (num_augmentations_per_image + 1)
    for i in range(remaining_augments):
        img_file = image_files[i % num_original_images]
        img_path = os.path.join(input_folder, img_file)
        img = load_img(img_path)
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)

        aug_iter = augment_datagen.flow(x, batch_size=1)
        batch = next(aug_iter)
        augmented_img = batch[0]
        augmented_img = np.uint8(augmented_img * 255)
        augmented_img_path = os.path.join(input_folder, f"aug_{os.path.splitext(img_file)[0]}_extra_{i}.jpg")
        augmented_img_pil = Image.fromarray(augmented_img)
        augmented_img_pil.save(augmented_img_path)

for folder in os.listdir(extracted_path):
    input_folder = os.path.join(extracted_path, folder)
    augment_images(input_folder, num_augmented_images=300)

print("Selesai Melakukan Augmentasi")

Selesai Melakukan Augmentasi


In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

image_height, image_width = 160, 160
batch_size = 32

train_generator = train_datagen.flow_from_directory(extracted_path, subset='training', batch_size=batch_size, target_size=(image_height, image_width), class_mode="categorical")
validation_generator = train_datagen.flow_from_directory(extracted_path, subset='validation', batch_size=batch_size, target_size=(image_height, image_width), class_mode="categorical")

Found 18028 images belonging to 33 classes.
Found 4491 images belonging to 33 classes.


In [None]:
base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(image_height, image_width, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(len(os.listdir(extracted_path)), activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3)
]

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m219055592/219055592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [None]:
model.fit(train_generator, validation_data=validation_generator, epochs=20)

Epoch 1/10


  self._warn_if_super_not_called()


[1m564/564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2764s[0m 5s/step - accuracy: 0.2642 - loss: 2.7561 - val_accuracy: 0.7094 - val_loss: 1.0250
Epoch 2/10
[1m564/564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2684s[0m 5s/step - accuracy: 0.5208 - loss: 1.6216 - val_accuracy: 0.8301 - val_loss: 0.6499
Epoch 3/10
[1m564/564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2659s[0m 5s/step - accuracy: 0.5891 - loss: 1.3870 - val_accuracy: 0.8297 - val_loss: 0.5998
Epoch 4/10
[1m234/564[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m21:00[0m 4s/step - accuracy: 0.6314 - loss: 1.2386

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
import pickle

In [None]:

for layer in base_model.layers[:100]:
    layer.trainable = False
for layer in base_model.layers[100:]:
    layer.trainable = True

model.compile(optimizer=Adam(learning_rate=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
model.fit(train_generator, validation_data=validation_generator, epochs=20)

In [None]:
def extract_embeddings(model, generator):
    embeddings = []
    labels = []
    for i in range(len(generator)):
        x, y = generator[i]
        embedding = model.predict(x)
        embeddings.append(embedding)
        labels.append(y)
    embeddings = np.vstack(embeddings)
    labels = np.vstack(labels)
    return embeddings, labels

train_embeddings, train_labels = extract_embeddings(base_model, train_generator)
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels.argmax(axis=1))


In [None]:
with open('embeddings.pkl', 'wb') as f:
    pickle.dump((train_embeddings, train_labels_encoded), f)

In [None]:
Y_pred_train = model.predict(train_generator)
y_pred_train = np.argmax(Y_pred_train, axis=1)
y_true_train = train_generator.classes
Y_pred_val = model.predict(validation_generator)
y_pred_val = np.argmax(Y_pred_val, axis=1)
y_true_val = validation_generator.classes

class_labels = list(validation_generator.class_indices.keys())

cm_train = confusion_matrix(y_true_train, y_pred_train)
print("Confusion Matrix - Training Data")
print(cm_train)

plt.figure(figsize=(10, 8))
sns.heatmap(cm_train, annot=True, fmt='d', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Training Data')
plt.show()

cm_val = confusion_matrix(y_true_val, y_pred_val)
print("Confusion Matrix - Validation Data")
print(cm_val)

plt.figure(figsize=(10, 8))
sns.heatmap(cm_val, annot=True, fmt='d', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Validation Data')
plt.show()

print("Classification Report - Training Data")
report_train = classification_report(y_true_train, y_pred_train, target_names=class_labels)
print(report_train)

print("Classification Report - Validation Data")
report_val = classification_report(y_true_val, y_pred_val, target_names=class_labels)
print(report_val)