In [10]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
data_dir = '/content/drive/MyDrive/Deepfake detec/Test'


In [12]:
!pip install mtcnn
!pip install tensorflow




In [13]:
!pip install lz4



In [14]:
import os
import cv2
import numpy as np
from PIL import Image
from mtcnn.mtcnn import MTCNN
from tqdm import tqdm

detector = MTCNN()

def extract_face(img_path, margin=0.2, size=(224, 224)):
    img = cv2.imread(img_path)
    if img is None:
        return None

    results = detector.detect_faces(img)
    if len(results) == 0:
        return None
    x, y, w, h = results[0]['box']
    x, y = max(0, x), max(0, y)
    margin_x = int(w * margin)
    margin_y = int(h * margin)
    x1 = max(x - margin_x, 0)
    y1 = max(y - margin_y, 0)
    x2 = min(x + w + margin_x, img.shape[1])
    y2 = min(y + h + margin_y, img.shape[0])
    face = img[y1:y2, x1:x2]
    face = cv2.resize(face, size)
    return face.astype(np.float32) / 255.0


In [16]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import cv2  # OpenCV để lưu ảnh

def save_face_image(face_img, save_path):
    face_img_uint8 = (face_img * 255).astype(np.uint8) if face_img.max() <= 1.0 else face_img
    cv2.imwrite(save_path, face_img_uint8)

def load_and_split_dataset(root_dir, test_size=0.15, val_size=0.15, max_images_per_class=1500, output_dir="/content/processed_faces"):
    images, labels = [], []

    for label, category in enumerate(['Real', 'Fake']):
        category_path = os.path.join(root_dir, category)
        output_category_path = os.path.join(output_dir, category)
        os.makedirs(output_category_path, exist_ok=True)

        img_names = os.listdir(category_path)[:max_images_per_class]

        for img_name in tqdm(img_names, desc=f"Loading {category}"):
            img_path = os.path.join(category_path, img_name)
            face = extract_face(img_path)
            if face is not None:
                images.append(face)
                labels.append(label)


    X = np.array(images)
    y = np.array(labels)

    X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
    val_ratio = val_size / (1 - test_size)
    X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=val_ratio, random_state=42, stratify=y_temp)

    return X_train, y_train, X_val, y_val, X_test, y_test
X_train, y_train, X_val, y_val, X_test, y_test = load_and_split_dataset(data_dir)


Loading Real: 100%|██████████| 1500/1500 [04:24<00:00,  5.67it/s]
Loading Fake: 100%|██████████| 1500/1500 [05:06<00:00,  4.89it/s]


In [19]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import cv2  # OpenCV để lưu ảnh
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

def build_and_train_model(X_train, y_train, X_val, y_val):
    # Xây dựng mô hình
    base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze EfficientNetB0

    inputs = Input(shape=(224, 224, 3))
    x = base_model(inputs, training=False)  # Dự đoán không cập nhật trọng số của EfficientNetB0
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)  # Dropout để tránh overfitting
    outputs = Dense(1, activation='sigmoid')(x)
    model = Model(inputs, outputs)

    model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

    model.summary()

    # Huấn luyện mô hình
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=10,
        batch_size=32,
        callbacks=[early_stop]
    )

    # Fine-tune mô hình
    base_model.trainable = True
    for layer in base_model.layers[:-20]:  # Chỉ mở khóa 20 layer cuối cùng
        layer.trainable = False

    # Compile lại mô hình với learning rate nhỏ
    model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

    # Tiếp tục huấn luyện với mô hình đã fine-tune
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=32,
        callbacks=[early_stop]
    )

    return model




# Huấn luyện mô hình
model = build_and_train_model(X_train, y_train, X_val, y_val)

Epoch 1/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 557ms/step - accuracy: 0.5379 - loss: 0.6937 - val_accuracy: 0.5329 - val_loss: 0.6911
Epoch 2/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - accuracy: 0.4932 - loss: 0.6980 - val_accuracy: 0.5329 - val_loss: 0.6910
Epoch 3/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 48ms/step - accuracy: 0.4988 - loss: 0.6956 - val_accuracy: 0.5329 - val_loss: 0.6910
Epoch 4/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 48ms/step - accuracy: 0.5179 - loss: 0.6913 - val_accuracy: 0.5329 - val_loss: 0.6910
Epoch 5/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.5398 - loss: 0.6913 - val_accuracy: 0.5329 - val_loss: 0.6910
Epoch 6/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.4876 - loss: 0.6987 - val_accuracy: 0.5329 - val_loss: 0.6910
Epoch 7/10
[1m47/47[0m [32m━━

In [20]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.4f}")

model.save('/content/drive/MyDrive/deepfake_model.h5')


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.5531 - loss: 0.6877




Test Accuracy: 0.5329
