In [1]:
# ===============================
# Environment Setup
# ===============================
USE_COLAB = True

if USE_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    PROJECT_ROOT = "/content/drive/MyDrive/real-and-ai-generated-synthetic-images"
else:
    PROJECT_ROOT = os.path.abspath(".")

print("PROJECT_ROOT:", PROJECT_ROOT)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
PROJECT_ROOT: /content/drive/MyDrive/real-and-ai-generated-synthetic-images


In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from IPython.display import Image
import matplotlib.pyplot as plt
import os, sys


In [3]:

if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

# Load Data

In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'cifake-real-and-ai-generated-synthetic-images' dataset.
Path to dataset files: /kaggle/input/cifake-real-and-ai-generated-synthetic-images


In [5]:
print("Path:", path)
print("Subfolders in path:", os.listdir(path))

Path: /kaggle/input/cifake-real-and-ai-generated-synthetic-images
Subfolders in path: ['test', 'train']


# Train/Val/Test

In [6]:
train_dir = os.path.join(path, "train")
test_dir = os.path.join(path, "test")

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='training'
)

val_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

Found 80000 images belonging to 2 classes.
Found 20000 images belonging to 2 classes.


# Train Model

In [7]:
base = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))
for layer in base.layers:
    layer.trainable = False

x = Flatten()(base.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base.input, outputs=output)

In [8]:
model.summary()

In [9]:
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_initial = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=3,
    callbacks=[
        EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
    ]
)


  self._warn_if_super_not_called()


Epoch 1/3
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1519s[0m 601ms/step - accuracy: 0.8167 - loss: 0.4007 - val_accuracy: 0.8853 - val_loss: 0.2739
Epoch 2/3
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1325s[0m 530ms/step - accuracy: 0.8723 - loss: 0.2998 - val_accuracy: 0.8960 - val_loss: 0.2582
Epoch 3/3
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1310s[0m 524ms/step - accuracy: 0.8838 - loss: 0.2820 - val_accuracy: 0.8876 - val_loss: 0.2652


# Fine tune
- Unfroze 3 last layer

In [10]:
for layer in base.layers[-3:]:
    layer.trainable = True

for layer in base.layers[-3:]:
  print('Layer: {} ; Trainable: {}'.format(layer, layer.trainable))


Layer: <Conv2D name=block5_conv2, built=True> ; Trainable: True
Layer: <Conv2D name=block5_conv3, built=True> ; Trainable: True
Layer: <MaxPooling2D name=block5_pool, built=True> ; Trainable: True


In [11]:
# Compile lại với learning rate nhỏ hơn
model.compile(optimizer=Adam(learning_rate=1e-5),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [12]:
checkpoint = ModelCheckpoint(
    f'{PROJECT_ROOT}/dl_cifake/best_vgg16_model.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

callbacks = [checkpoint, early_stop]


In [13]:
history_finetune = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=3,
    callbacks=callbacks
)

Epoch 1/3
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 428ms/step - accuracy: 0.8963 - loss: 0.2536
Epoch 1: val_accuracy improved from -inf to 0.92175, saving model to /content/drive/MyDrive/real-and-ai-generated-synthetic-images/dl_cifake/best_vgg16_model.keras
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1340s[0m 533ms/step - accuracy: 0.8963 - loss: 0.2536 - val_accuracy: 0.9218 - val_loss: 0.1985
Epoch 2/3
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 435ms/step - accuracy: 0.9192 - loss: 0.2048
Epoch 2: val_accuracy improved from 0.92175 to 0.92395, saving model to /content/drive/MyDrive/real-and-ai-generated-synthetic-images/dl_cifake/best_vgg16_model.keras
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1352s[0m 541ms/step - accuracy: 0.9192 - loss: 0.2048 - val_accuracy: 0.9240 - val_loss: 0.1906
Epoch 3/3
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 433ms/step - accuracy: 0.9293

In [14]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
import numpy as np
test_datagen = ImageDataGenerator(rescale=1./255)
test_gen = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224,224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# --- Dự đoán trên tập test ---
y_pred = model.predict(test_gen)
y_pred_classes = (y_pred > 0.5).astype("int32")
y_true = test_gen.classes  # nhãn thật của tập test

# --- Tính các chỉ số ---
acc = accuracy_score(y_true, y_pred_classes)
prec = precision_score(y_true, y_pred_classes)
rec = recall_score(y_true, y_pred_classes)
f1 = f1_score(y_true, y_pred_classes)
cm = confusion_matrix(y_true, y_pred_classes)

# --- In kết quả ---
print(f"Test Accuracy:  {acc:.4f}")
print(f"Precision:      {prec:.4f}")
print(f"Recall:         {rec:.4f}")
print(f"F1-score:       {f1:.4f}")
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_true, y_pred_classes, digits=4))


Found 20000 images belonging to 2 classes.
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 178ms/step
Test Accuracy:  0.9351
Precision:      0.9502
Recall:         0.9182
F1-score:       0.9339

Confusion Matrix:
 [[9519  481]
 [ 818 9182]]

Classification Report:
               precision    recall  f1-score   support

           0     0.9209    0.9519    0.9361     10000
           1     0.9502    0.9182    0.9339     10000

    accuracy                         0.9351     20000
   macro avg     0.9355    0.9350    0.9350     20000
weighted avg     0.9355    0.9351    0.9350     20000

