In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import glob
import numpy as np
import cv2

def load_train_lfw_data(data_dir):
    # Danh sách tất cả các tên thư mục (mỗi thư mục chứa các ảnh của một người)
    person_dirs = glob.glob(os.path.join(data_dir, '*'))

    images = []
    names = []

    for person_dir in person_dirs:
        person_name = os.path.basename(person_dir)  # Tên người
        image_paths = glob.glob(os.path.join(person_dir, '*.jpg'))

        for img_path in image_paths:
            img = cv2.imread(img_path)  # Đọc ảnh với 3 kênh màu (RGB)
            img = cv2.resize(img, (224, 224))  # Resize về kích thước mong muốn
            images.append(img)
            names.append(person_name)

    images = np.array(images)
    names = np.array(names)

    return images, names

data_dir = '/content/drive/MyDrive/Colab Notebooks/Train_ver3'
X, names = load_train_lfw_data(data_dir)

In [None]:
# Encode nhãn thành số nguyên
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
names_encoded = le.fit_transform(names)

# Lưu encoder để dùng sau
import joblib
joblib.dump(le, '/content/drive/MyDrive/label_encoder_ver3.pkl')

['/content/drive/MyDrive/label_encoder_ver3.pkl']

In [None]:
# Chia train/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, names_encoded, test_size=0.2, stratify=names_encoded)

# Thông tin dữ liệu
print(X_train.shape, X_test.shape)

(772, 224, 224, 3) (194, 224, 224, 3)


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    featurewise_center=True,          # Chuẩn hóa mean = 0
    featurewise_std_normalization=True, # Chuẩn hóa std = 1
    rotation_range=20,                # Xoay ảnh tối đa 20 độ
    width_shift_range=0.2,           # Dịch chuyển theo chiều rộng
    height_shift_range=0.2,          # Dịch chuyển theo chiều cao
    horizontal_flip=True,            # Lật ảnh theo chiều ngang
    #zoom_range=0.1,                  # Zoom ảnh
    brightness_range=[0.8, 1.2]     # Thay đổi độ sáng
)

# Fit datagen với dữ liệu train
datagen.fit(X_train)

In [None]:
# Tạo dữ liệu augmented
X_augmented = []
y_augmented = []

# Số lượng ảnh augmented cho mỗi ảnh gốc
augmented_per_image = 2

print(f"Tạo {augmented_per_image} ảnh biến thể cho mỗi ảnh gốc...")

for i in range(len(X_train)):
    if i % 100 == 0:  # In progress mỗi 100 ảnh
        print(f"Đã xử lý {i}/{len(X_train)} ảnh...")

    # Thêm ảnh gốc vào tập augmented
    X_augmented.append(X_train[i])
    y_augmented.append(y_train[i])

    # Tạo các ảnh biến thể
    no_img = 0
    for x_batch in datagen.flow(np.expand_dims(X_train[i], axis=0), batch_size=1):
        X_augmented.append(x_batch[0])
        y_augmented.append(y_train[i])
        no_img += 1
        if no_img == augmented_per_image:
            break

Tạo 2 ảnh biến thể cho mỗi ảnh gốc...
Đã xử lý 0/772 ảnh...
Đã xử lý 100/772 ảnh...
Đã xử lý 200/772 ảnh...
Đã xử lý 300/772 ảnh...
Đã xử lý 400/772 ảnh...
Đã xử lý 500/772 ảnh...
Đã xử lý 600/772 ảnh...
Đã xử lý 700/772 ảnh...


In [None]:
X_train_model = np.array(X_augmented)
y_train_model= np.array(y_augmented)

In [None]:
print(len(X_train_model), len(y_train_model))


2316 2316


In [None]:
!pip install tensorflow==2.15.0
!pip install tensorflow-addons

Collecting tensorflow==2.15.0
  Downloading tensorflow-2.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting ml-dtypes~=0.2.0 (from tensorflow==2.15.0)
  Downloading ml_dtypes-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting numpy<2.0.0,>=1.23.5 (from tensorflow==2.15.0)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 (from tensorflow==2.15.0)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting wrapt<1.15,>=1.11.0 (from tensorflow==2.15.0)
  Downloading wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting tenso

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl.metadata (3.6 kB)
Downloading tensorflow_addons-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
  Attempting uninstall: typeguard
    Found existing installation: typeguard 4.4.2
    Uninstalling typeguard-4.4.2:
      Successfully uninstalled typeguard-4.4.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
inflect 7.5.0 requires typeguard>=4.0.1, 

In [None]:
# Tạo mô hình backbone
import tensorflow as tf
from tensorflow.keras.layers import Dense, Lambda, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG16

def _base_network():
    base_model = VGG16(include_top=False, weights='imagenet', input_tensor=Input(shape=(224, 224, 3)))

    #Mở 4 lớp cuối
    for layer in base_model.layers[:-4]:
        layer.trainable = False
    for layer in base_model.layers[-4:]:
        layer.trainable = True

    x = Flatten()(base_model.output)
    x = Dense(128)(x)
    norm2 = Lambda(lambda x: tf.math.l2_normalize(x, axis=1))(x)
    model = Model(inputs=base_model.input, outputs=norm2)
    return model

# Khởi tạo mô hình
model = _base_network()
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [None]:
print(X_train_model.shape)

(2316, 224, 224, 3)


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.0001),
    loss=tfa.losses.TripletSemiHardLoss()
)

In [None]:
batch_size = 80
gen_train = tf.data.Dataset.from_tensor_slices((X_train_model, y_train_model)).shuffle(2048).batch(batch_size).repeat()

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    '/content/drive/MyDrive/facenet_ver4.h5',
    monitor='loss',
    save_best_only=True,
    save_weights_only=False,
    mode='min',
    verbose=1
)

In [None]:
history = model.fit(
    gen_train,
    steps_per_epoch=30,
    epochs=50,
    callbacks=[checkpoint_callback]
)

Epoch 1/50
Epoch 1: loss improved from 0.99260 to 0.96317, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 2/50
Epoch 2: loss improved from 0.96317 to 0.92327, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 3/50
Epoch 3: loss improved from 0.92327 to 0.86645, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 4/50
Epoch 4: loss improved from 0.86645 to 0.81878, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 5/50
Epoch 5: loss improved from 0.81878 to 0.75218, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 6/50
Epoch 6: loss improved from 0.75218 to 0.70006, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 7/50
Epoch 7: loss improved from 0.70006 to 0.63403, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 8/50
Epoch 8: loss improved from 0.63403 to 0.57430, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 9/50
Epoch 9: loss improved from 0.57430 to 0.51500, saving model to /content/driv

In [None]:
from tensorflow.keras.models import load_model
import tensorflow_addons as tfa

model = load_model(
    '/content/drive/MyDrive/facenet_ver4.h5',
    custom_objects={'TripletSemiHardLoss': tfa.losses.TripletSemiHardLoss}
)

In [None]:
import tensorflow as tf
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.0001),
    loss=tfa.losses.TripletSemiHardLoss()
)

In [None]:
batch_size = 80
gen_train = tf.data.Dataset.from_tensor_slices((X_train_model, y_train_model)).shuffle(2048).batch(batch_size).repeat()

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    '/content/drive/MyDrive/facenet_ver4.h5',
    monitor='loss',
    save_best_only=True,
    save_weights_only=False,
    mode='min',
    verbose=1
)

In [None]:
history = model.fit(
    gen_train,
    steps_per_epoch=30,
    epochs=50,
    callbacks=[checkpoint_callback]
)

Epoch 1/50
Epoch 1: loss improved from inf to 0.58743, saving model to /content/drive/MyDrive/facenet_ver4.h5


  saving_api.save_model(


Epoch 2/50
Epoch 2: loss improved from 0.58743 to 0.47707, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 3/50
Epoch 3: loss improved from 0.47707 to 0.41344, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 4/50
Epoch 4: loss improved from 0.41344 to 0.35739, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 5/50
Epoch 5: loss improved from 0.35739 to 0.30258, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 6/50
Epoch 6: loss improved from 0.30258 to 0.26192, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 7/50
Epoch 7: loss improved from 0.26192 to 0.25331, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 8/50
Epoch 8: loss improved from 0.25331 to 0.21793, saving model to /content/drive/MyDrive/facenet_ver4.h5
Epoch 9/50