**Bước 1 : Chuẩn bị môi trường trên Google Colab**

In [None]:
from google.colab import drive
drive.mount('/content/drive')


**Câu hỏi 2: Xây dựng và triển khai mạng VGG16**

Thiết lập các tham số như đề bài và load dữ liệu

In [7]:
import os
import numpy as np
import cv2
import pickle
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import matplotlib.pyplot as plt

# Đường dẫn tới dữ liệu
BASE_PATH = "/content/drive/MyDrive/Internet of Things – Chuyên ngành hẹp - Khoa học Máy Tính/IOT_01/Bài làm Module 1/dataset"
IMAGES_PATH = os.path.sep.join([BASE_PATH, "images"])
ANNOTS_PATH = os.path.sep.join([BASE_PATH, "annotations"])
MODEL_PATH = "/content/drive/MyDrive/Internet of Things – Chuyên ngành hẹp - Khoa học Máy Tính/IOT_01/Bài làm Module 1/Output/21004173.h5"
LB_PATH = "/content/drive/MyDrive/Internet of Things – Chuyên ngành hẹp - Khoa học Máy Tính/IOT_01/Bài làm Module 1/face_detector/lb.pickle"
PLOTS_PATH = "/content/drive/MyDrive/Internet of Things – Chuyên ngành hẹp - Khoa học Máy Tính/IOT_01/Bài làm Module 1/Output/plots"

# Tham số huấn luyện
learning_rate = 1e-3
NUM_EPOCHS = 50
BATCH_SIZE = 16


Load và xử lý dữ liệu

In [55]:
data, labels, bboxes, imagePaths = [], [], [], []

for csvPath in os.listdir(ANNOTS_PATH):
    rows = open(os.path.join(ANNOTS_PATH, csvPath)).read().strip().split("\n")
    rows = rows[1:]
    for row in rows:
        filename, label, startX, startY, endX, endY = row.split(",")
        imagePath = os.path.join(IMAGES_PATH, label, filename)
        imageDir = os.path.join(IMAGES_PATH, label)
        image = cv2.imread(imagePath)
        (h, w) = image.shape[:2]
        startX, startY, endX, endY = float(startX)/w, float(startY)/h, float(endX)/w, float(endY)/h
        image = load_img(imagePath, target_size=(224, 224))
        image = img_to_array(image) / 255.0
        data.append(image)
        labels.append(label)
        bboxes.append((startX, startY, endX, endY))
        imagePaths.append(imagePath)

data = np.array(data, dtype="float32")
labels = np.array(labels)
bboxes = np.array(bboxes, dtype="float32")
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

if len(lb.classes_) == 2:
    labels = to_categorical(labels)

trainImages, testImages, trainLabels, testLabels, trainBBoxes, testBBoxes = train_test_split(
    data, labels, bboxes, test_size=0.1, random_state=42)


In [57]:
print("dữ liệu : ", data)
print("label : ", labels)
print("bbox : ", bboxes)
print("imagePaths : ", imagePaths)

dữ liệu :  [[[[0.6039216  0.6745098  0.7607843 ]
   [0.6039216  0.6745098  0.7607843 ]
   [0.6039216  0.6745098  0.7607843 ]
   ...
   [0.64705884 0.70980394 0.77254903]
   [0.6431373  0.7058824  0.76862746]
   [0.6431373  0.7058824  0.76862746]]

  [[0.6039216  0.6745098  0.7607843 ]
   [0.6039216  0.6745098  0.7607843 ]
   [0.6039216  0.6745098  0.7607843 ]
   ...
   [0.64705884 0.70980394 0.77254903]
   [0.6431373  0.7058824  0.76862746]
   [0.6431373  0.7058824  0.76862746]]

  [[0.60784316 0.6784314  0.7647059 ]
   [0.60784316 0.6784314  0.7647059 ]
   [0.60784316 0.6784314  0.7647059 ]
   ...
   [0.64705884 0.70980394 0.77254903]
   [0.6431373  0.7058824  0.76862746]
   [0.6431373  0.7058824  0.76862746]]

  ...

  [[0.01960784 0.09019608 0.13725491]
   [0.03137255 0.10196079 0.14901961]
   [0.01960784 0.09411765 0.15294118]
   ...
   [0.84705883 0.8509804  0.85882354]
   [0.8509804  0.85490197 0.8627451 ]
   [0.8509804  0.85490197 0.8627451 ]]

  [[0.00784314 0.09411765 0.149019

Xây dựng mô hình

In [58]:
vgg = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))
vgg.trainable = False

flatten = vgg.output
flatten = Flatten()(flatten)

# Mạng dự đoán bounding box
bboxHead = Dense(128, activation="relu")(flatten)
bboxHead = Dense(64, activation="relu")(bboxHead)
bboxHead = Dense(32, activation="relu")(bboxHead)
bboxHead = Dense(4, activation="sigmoid", name="bounding_box")(bboxHead)

# Mạng dự đoán nhãn
softmaxHead = Dense(512, activation="relu")(flatten)
softmaxHead = Dropout(0.5)(softmaxHead)
softmaxHead = Dense(512, activation="relu")(softmaxHead)
softmaxHead = Dropout(0.5)(softmaxHead)
softmaxHead = Dense(len(lb.classes_), activation="softmax", name="class_label")(softmaxHead)

model = Model(inputs=vgg.input, outputs=(bboxHead, softmaxHead))
losses = {"class_label": "categorical_crossentropy", "bounding_box": "mean_squared_error"}
lossWeights = {"class_label": 1.0, "bounding_box": 1.0}
opt = Adam(learning_rate=learning_rate)
model.compile(loss=losses, optimizer=opt, loss_weights=lossWeights)
model.summary()


Huấn luyện mô hình

In [None]:
trainTargets = {"class_label": trainLabels, "bounding_box": trainBBoxes}
testTargets = {"class_label": testLabels, "bounding_box": testBBoxes}

H = model.fit(trainImages, trainTargets, validation_data=(testImages, testTargets),
              batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, verbose=1)

model.save(MODEL_PATH)
with open(LB_PATH, "wb") as f:
    f.write(pickle.dumps(lb))


Hiển thị Loss và Accuracy

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

# In ra các key có trong history để kiểm tra
print("Available keys in history:", H.history.keys())

# Tạo thư mục để lưu plots nếu chưa có
if not os.path.exists(PLOTS_PATH):
    os.makedirs(PLOTS_PATH)

# 1. Vẽ Loss plot
plt.figure(figsize=(10, 6))
plt.plot(H.history['loss'], label='Training Loss')
plt.plot(H.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig(os.path.join(PLOTS_PATH, 'loss_plot.png'))
plt.close()

# 2. Vẽ Accuracy plot cho class label
plt.figure(figsize=(10, 6))
plt.plot(H.history['class_label_loss'], label='Training Class Loss')
plt.plot(H.history['val_class_label_loss'], label='Validation Class Loss')
plt.title('Class Label Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig(os.path.join(PLOTS_PATH, 'class_loss_plot.png'))
plt.close()

# 3. Vẽ Loss plot cho bounding box
plt.figure(figsize=(10, 6))
plt.plot(H.history['bounding_box_loss'], label='Training BBox Loss')
plt.plot(H.history['val_bounding_box_loss'], label='Validation BBox Loss')
plt.title('Bounding Box Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig(os.path.join(PLOTS_PATH, 'bbox_loss_plot.png'))
plt.close()

# 4. Vẽ Confusion Matrix
# Lấy predictions cho test set
predictions = model.predict(testImages)[1]  # [1] để lấy softmax predictions
y_pred = np.argmax(predictions, axis=1)
y_true = np.argmax(testLabels, axis=1)

# Tạo confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Vẽ confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=lb.classes_,
            yticklabels=lb.classes_)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig(os.path.join(PLOTS_PATH, 'confusion_matrix.png'))
plt.close()

# In thêm các metrics
from sklearn.metrics import classification_report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=lb.classes_))

Câu hỏi 3: Kiểm tra mô hình


In [None]:
import cv2
import numpy as np
import os
import pickle
from tensorflow.keras.models import load_model

# Load model và label binarizer
model = load_model(MODEL_PATH)
lb = pickle.loads(open(LB_PATH, "rb").read())

# Load face detector
prototxt_path = "/content/drive/MyDrive/Internet of Things – Chuyên ngành hẹp - Khoa học Máy Tính/IOT_01/Bài làm Module 1/face_detector/deploy.prototxt.txt"
weights_path = "/content/drive/MyDrive/Internet of Things – Chuyên ngành hẹp - Khoa học Máy Tính/IOT_01/Bài làm Module 1/face_detector/res10_300x300_ssd_iter_140000.caffemodel"
face_detector = cv2.dnn.readNet(prototxt_path, weights_path)

def detect_and_predict(image, face_detector, model, lb):
    # Lấy kích thước ảnh
    (h, w) = image.shape[:2]

    # Tạo blob từ ảnh
    blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))

    # Phát hiện khuôn mặt
    face_detector.setInput(blob)
    detections = face_detector.forward()

    faces = []
    locs = []
    preds = []

    # Lặp qua các khuôn mặt được phát hiện
    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]

        if confidence > 0.5:  # Ngưỡng tin cậy
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # Đảm bảo bounding box nằm trong kích thước ảnh
            startX = max(0, startX)
            startY = max(0, startY)
            endX = min(w, endX)
            endY = min(h, endY)

            # Cắt khuôn mặt và xử lý
            face = image[startY:endY, startX:endX]
            face = cv2.resize(face, (224, 224))
            face = face / 255.0
            faces.append(face)
            locs.append((startX, startY, endX, endY))

    # Nếu phát hiện được khuôn mặt
    if len(faces) > 0:
        faces = np.array(faces)
        # Dự đoán với model VGG16
        (bboxPreds, labelPreds) = model.predict(faces)

        # Lấy nhãn và độ tin cậy
        for labelPred in labelPreds:
            i = np.argmax(labelPred)
            prob = labelPred[i]
            label = lb.classes_[i]
            preds.append((label, prob))

    return locs, preds

# Đường dẫn ảnh test
imagePath = "/content/drive/MyDrive/Internet of Things – Chuyên ngành hẹp - Khoa học Máy Tính/IOT_01/Bài làm Module 1/predict/testimg.jpg"

# Đọc và xử lý ảnh
image = cv2.imread(imagePath)
orig = image.copy()

# Phát hiện khuôn mặt và dự đoán
boxes, predictions = detect_and_predict(orig, face_detector, model, lb)

# Vẽ kết quả
for ((startX, startY, endX, endY), (label, prob)) in zip(boxes, predictions):
    # Vẽ bounding box
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

    # Chuẩn bị text
    text = f"{label}: {prob * 100:.2f}%"

    # Vẽ background cho text
    text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
    cv2.rectangle(orig,
                 (startX, startY - text_size[1] - 10),
                 (startX + text_size[0], startY),
                 (0, 255, 0),
                 -1)

    # Vẽ text
    cv2.putText(orig, text, (startX, startY - 5),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

# Lưu kết quả
output_dir = '/content/drive/MyDrive/Internet of Things – Chuyên ngành hẹp - Khoa học Máy Tính/IOT_01/Bài làm Module 1/Output'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

output_path = os.path.join(output_dir, 'img_result.png')
cv2.imwrite(output_path, orig)
print(f"Result image saved at: {output_path}")