In [9]:
!pip install mtcnn

Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting lz4>=4.3.3 (from mtcnn)
  Downloading lz4-4.4.3-cp312-cp312-win_amd64.whl.metadata (3.9 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
   ---------------------------------------- 0.0/1.9 MB ? eta -:--:--
   --------------------------- ------------ 1.3/1.9 MB 11.2 MB/s eta 0:00:01
   --------------------------- ------------ 1.3/1.9 MB 11.2 MB/s eta 0:00:01
   -------------------------------------- - 1.8/1.9 MB 3.0 MB/s eta 0:00:01
   ---------------------------------------- 1.9/1.9 MB 3.0 MB/s eta 0:00:00
Downloading lz4-4.4.3-cp312-cp312-win_amd64.whl (99 kB)
Installing collected packages: lz4, mtcnn
  Attempting uninstall: lz4
    Found existing installation: lz4 4.3.2
    Uninstalling lz4-4.3.2:
      Successfully uninstalled lz4-4.3.2
Successfully installed lz4-4.4.3 mtcnn-1.0.0



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import os
import json
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from mtcnn import MTCNN
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Định nghĩa đường dẫn
DATASET_PATH = "D:/Study/Programming/DPL/dfdc_train_part_00/dfdc_train_part_0"  # Cập nhật đường dẫn thực tế
SAVE_PATH = "D:/Study/Programming/DPL/dfdc_train_part_00/image after detect"
METADATA_PATH = os.path.join(DATASET_PATH, "metadata.json")

# Đọc metadata
with open(METADATA_PATH, 'r') as f:
    metadata = json.load(f)

# Chuẩn bị danh sách video và nhãn
video_labels = []
for video, details in metadata.items():
    video_path = os.path.join(DATASET_PATH, video)
    label = 1 if details["label"] == "FAKE" else 0
    video_labels.append((video_path, label))

df = pd.DataFrame(video_labels, columns=["video_path", "label"])

In [2]:
# Hàm trích xuất 32 frames cách đều nhau từ video
def extract_frames(video_path, num_frames=32):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_ids = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    frames = []
    for frame_id in frame_ids:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
        ret, frame = cap.read()
        if ret:
            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    cap.release()
    return frames

# MTCNN face detection
detector = MTCNN()

def detect_face(image, margin=0.3):
    faces = detector.detect_faces(image)
    if faces:
        x, y, w, h = faces[0]['box']
        x -= int(w * margin)
        y -= int(h * margin)
        w = int(w * (1 + 2 * margin))
        h = int(h * (1 + 2 * margin))
        return image[max(0, y):y + h, max(0, x):x + w]
    return None

# Chuẩn bị dữ liệu hình ảnh
image_data = []
labels = []

In [None]:
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing videos"):
    video_name = os.path.splitext(os.path.basename(row["video_path"]))[0]
    frames = extract_frames(row["video_path"])
    for frame_number, frame in enumerate(frames):
        face = detect_face(frame)
        if face is not None:
            face = cv2.resize(face, (224, 224))
            image_data.append(face)
            labels.append(row["label"])
            
            # Lưu ảnh sau khi detect
            save_dir = os.path.join(SAVE_PATH, video_name, str(frame_number))
            os.makedirs(save_dir, exist_ok=True)
            save_path = os.path.join(save_dir, "face.jpg")
            cv2.imwrite(save_path, cv2.cvtColor(face, cv2.COLOR_RGB2BGR))

In [67]:
import numpy as np
import json
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Định nghĩa đường dẫn dữ liệu
data_dir = "D:\\Study\\Programming\\DPL\\dfdc_train_part_00\\image after detect\\"
metadata_path = "C:\\Users\\vpmx5\\Downloads\\Presentation DPL PRJ 1\\new_metadata.json"

# Đọc metadata để lấy nhãn
with open(metadata_path, "r") as f:
    metadata = json.load(f)

image_paths = []
labels = []

# Duyệt qua từng video folder
video_folders = os.listdir(data_dir)
for video_name in video_folders:
    video_path = os.path.join(data_dir, video_name)

    # Bỏ qua nếu không phải thư mục
    if not os.path.isdir(video_path):
        continue

    print ( metadata.get(video_name + ".mp4", {}) )
    # Xác định nhãn (fake = 1, real = 0)
    label = 1 if metadata.get(video_name + ".mp4", {}).get("label") == "REAL" else 0

    # Duyệt qua từng frame trong thư mục
    for frame_file in os.listdir(video_path):
        frame_path = os.path.join(video_path, frame_file, "face.jpg")  # Đường dẫn đúng

        if os.path.exists(frame_path):  # Kiểm tra file tồn tại trước khi thêm vào danh sách
            image_paths.append(frame_path)
            labels.append(label)

# Chia train/test
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, labels, test_size=0.3, random_state=42
)

# Hàm load ảnh từ đường dẫn
def load_and_preprocess(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    image = image / 255.0  # Chuẩn hóa ảnh về [0,1]
    return image, label

# Tạo tf.data.Dataset với lazy loading
train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_dataset = train_dataset.map(load_and_preprocess).batch(16).shuffle(1000).prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))
test_dataset = test_dataset.map(load_and_preprocess).batch(16).prefetch(tf.data.AUTOTUNE)


{'label': 'REAL', 'split': 'train'}
{'label': 'FAKE', 'split': 'train', 'original': 'aayrffkzxn.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'fzvpbrzssi.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'qyqufaskjs.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'wynotylpnm.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'fsaronfupy.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'jawgcggquk.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'ybnucgidtu.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'eppyqpgewp.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'qarqtkvgby.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'fdpisghkmd.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'exseruhiuk.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'vpmyeepbep.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'ljuuovfkgi.mp4'}
{'label': 'FAKE', 'split': 'train', 'original': 'nweufafotd.mp4'}
{'label': 'FAKE', 'split': 'train', 'ori

In [69]:
set(train_labels)

{0, 1}

In [87]:
from tensorflow.keras.metrics import AUC


# Xây dựng mô hình dựa trên EfficientNetB0
base_model = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation="relu")(x)
out = Dense(1, activation="sigmoid")(x)
model = Model(inputs=base_model.input, outputs=out)

# Compile mô hình
model.compile(optimizer=Adam(learning_rate=0.0001), loss="binary_crossentropy", metrics=[AUC(curve="PR")])


In [88]:
model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=1,
    verbose=1  # 0: Không hiển thị, 1: Mặc định, 2: Rút gọn
)


[1m1812/1812[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3107s[0m 2s/step - auc_1: 0.4933 - loss: 0.1670 - val_auc_1: 0.0673 - val_loss: 3.5952


<keras.src.callbacks.history.History at 0x1963eda1220>

In [97]:
MODEL_SAVE_PATH = "C:\\Users\\vpmx5\\Downloads\\Presentation DPL PRJ 1\\dfdc_model.h5"
model.save(MODEL_SAVE_PATH)
print(f"Model saved at {MODEL_SAVE_PATH}")



Model saved at C:\Users\vpmx5\Downloads\Presentation DPL PRJ 1\dfdc_model.h5


In [93]:
# Dự đoán trên tập test
y_true = []
y_pred = []
y_scores = []

for images, labels in test_dataset:
    predictions = model.predict(images)
    predicted_labels = (predictions >= 0.5).astype(int).flatten()
    
    y_true.extend(labels.numpy())
    y_pred.extend(predicted_labels)
    y_scores.extend(predictions.flatten())

# Đánh giá mô hình
from sklearn.metrics import classification_report, accuracy_score



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 307ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 304ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 301ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [94]:
print("Accuracy:", accuracy_score(y_true, y_pred))

Accuracy: 0.9336874295831321
