# Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
import os
import shutil

def flatten_folder(source_dir, target_dir):
    os.makedirs(target_dir, exist_ok=True)

    for root, dirs, files in os.walk(source_dir):
        for file in files:
            if file.lower().endswith(('.JPG', '.PNG', '.JPEG')):
                src_path = os.path.join(root, file)
                dst_path = os.path.join(target_dir, file)
                shutil.copy2(src_path, dst_path)

# Example usage:
flatten_folder("C:/data/wiki", "data/real")
flatten_folder("C:/data/inpainting", "data/fake")


print(len(os.listdir("data/real")))
print(len(os.listdir("data/fake")))



30000
30000


# Dataset

In [3]:
import tensorflow as tf

# --------- CONFIG ---------
image_size = (224, 224)
batch_size = 200
data_dir = "data"  # Make sure 'data/real/' and 'data/fake/' exist

# --------- LOAD RAW DATASETS ---------
# Load before any .map() so we can access class_names
raw_train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=image_size,
    batch_size=batch_size,
    label_mode="categorical"
)

raw_val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=image_size,
    batch_size=batch_size,
    label_mode="categorical"
)

# --------- CLASS NAMES ---------
class_names = raw_train_ds.class_names
num_classes = len(class_names)
print("Class names:", class_names)

# --------- NORMALIZATION & PIPELINE OPTIMIZATION ---------
normalization_layer = tf.keras.layers.Rescaling(1./255)
AUTOTUNE = tf.data.AUTOTUNE

train_ds = (
    raw_train_ds
    .map(lambda x, y: (normalization_layer(x), y))
    .cache()
    .shuffle(1000)
    .prefetch(buffer_size=AUTOTUNE)
)

val_ds = (
    raw_val_ds
    .map(lambda x, y: (normalization_layer(x), y))
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)



Found 60000 files belonging to 2 classes.
Using 48000 files for training.
Found 60000 files belonging to 2 classes.
Using 12000 files for validation.
Class names: ['fake', 'real']


In [None]:
# -------- MODEL DEFINITION ---------

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),

    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(650, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(304, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(161, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(80, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# --------- TRAINING ---------
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    batch_size = 128
)
#batch_size?

# --------- EVALUATE ON VALIDATION SET ---------
loss, acc = model.evaluate(val_ds, verbose=1)

print(f"\n✅ Evaluation Results:")
print(f"Test loss: {loss:.4f}")
print(f"Test accuracy: {acc:.4f}")

#---Plot


#Accuracy Plot
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training vs Validation Accuracy')
plt.legend()
plt.show()


# Loss Plot
plt.figure(figsize=(6, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Training vs Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()




Epoch 1/10
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1184s[0m 4s/step - accuracy: 0.5072 - loss: 1.1285 - val_accuracy: 0.5815 - val_loss: 0.6705
Epoch 2/10
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m952s[0m 4s/step - accuracy: 0.6096 - loss: 0.6540 - val_accuracy: 0.7483 - val_loss: 0.5227
Epoch 3/10
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m915s[0m 4s/step - accuracy: 0.7632 - loss: 0.4885 - val_accuracy: 0.7831 - val_loss: 0.4506
Epoch 4/10
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m932s[0m 4s/step - accuracy: 0.8693 - loss: 0.3086 - val_accuracy: 0.9408 - val_loss: 0.1372
Epoch 5/10
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2815s[0m 12s/step - accuracy: 0.9724 - loss: 0.0754 - val_accuracy: 0.9759 - val_loss: 0.0715
Epoch 6/10
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m891s[0m 4s/step - accuracy: 0.9924 - loss: 0.0243 - val_accuracy: 0.9538 - val_loss: 0.1501
Epoch 7/10
[1m240/