<a href="https://colab.research.google.com/github/mpanc008/final_project_cai4203/blob/Tuntu/fin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
import os
import shutil
import zipfile
import requests
from tqdm import tqdm

RUNNING_ON_COLAB = True

def download_and_unzip(url, extract_to):
    filename = url.split("/")[-1]
    print(f"Downloading {filename}...")
    response = requests.get(url, stream=True)
    with open(filename, "wb") as f:
        total = int(response.headers.get("content-length", 0))
        with tqdm.wrapattr(f, "write", total=total) as out_file:
            for chunk in response.iter_content(chunk_size=8192):
                out_file.write(chunk)
    print(f"Unzipping {filename}...")
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    os.remove(filename)

def flatten_folder(source_dir, target_dir):
    os.makedirs(target_dir, exist_ok=True)
    for root, dirs, files in os.walk(source_dir):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                src = os.path.join(root, file)
                dst = os.path.join(target_dir, file)
                shutil.copy2(src, dst)

if RUNNING_ON_COLAB:
    os.makedirs("data/real", exist_ok=True)
    os.makedirs("data/fake", exist_ok=True)

    urls = {
        "wiki": "https://huggingface.co/datasets/OpenRL/DeepFakeFace/resolve/main/wiki.zip",
        "inpainting": "https://huggingface.co/datasets/OpenRL/DeepFakeFace/resolve/main/inpainting.zip",
        "insight": "https://huggingface.co/datasets/OpenRL/DeepFakeFace/resolve/main/insight.zip",
        "text2img": "https://huggingface.co/datasets/OpenRL/DeepFakeFace/resolve/main/text2img.zip"
    }

    for name, url in urls.items():
        extract_dir = f"temp/{name}"
        download_and_unzip(url, extract_dir)
        target = "data/real" if name == "wiki" else "data/fake"
        flatten_folder(extract_dir, target)

else:
    # Your local logic here
    flatten_folder("C:/data/wiki", "data/real")
    flatten_folder("C:/data/inpainting", "data/fake")
    flatten_folder("C:/data/insight", "data/fake")
    flatten_folder("C:/data/text2img", "data/fake")

print("Real images:", len(os.listdir("data/real")))
print("Fake images:", len(os.listdir("data/fake")))




Downloading wiki.zip...


100%|██████████| 1.65G/1.65G [00:06<00:00, 265MB/s]


Unzipping wiki.zip...
Downloading inpainting.zip...


100%|██████████| 1.01G/1.01G [00:03<00:00, 322MB/s]


Unzipping inpainting.zip...
Downloading insight.zip...


100%|██████████| 1.01G/1.01G [00:03<00:00, 283MB/s]


Unzipping insight.zip...
Downloading text2img.zip...


100%|██████████| 1.06G/1.06G [00:03<00:00, 325MB/s]


Unzipping text2img.zip...
Real images: 30000
Fake images: 30000


# Dataset

In [None]:
import tensorflow as tf

# --------- CONFIG ---------
image_size = (224, 224)
batch_size = 200
data_dir = "data"  # Make sure 'data/real/' and 'data/fake/' exist

# --------- LOAD RAW DATASETS ---------
# Load before any .map() so we can access class_names
raw_train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=image_size,
    batch_size=batch_size,
    label_mode="categorical"
)

raw_val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=image_size,
    batch_size=batch_size,
    label_mode="categorical"
)

# --------- CLASS NAMES ---------
class_names = raw_train_ds.class_names
num_classes = len(class_names)
print("Class names:", class_names)

# --------- NORMALIZATION & PIPELINE OPTIMIZATION ---------
normalization_layer = tf.keras.layers.Rescaling(1./255)
AUTOTUNE = tf.data.AUTOTUNE

train_ds = (
    raw_train_ds
    .map(lambda x, y: (normalization_layer(x), y))
    .cache()
    .shuffle(1000)
    .prefetch(buffer_size=AUTOTUNE)
)

val_ds = (
    raw_val_ds
    .map(lambda x, y: (normalization_layer(x), y))
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)



Found 60000 files belonging to 2 classes.
Using 48000 files for training.
Found 60000 files belonging to 2 classes.
Using 12000 files for validation.
Class names: ['fake', 'real']


In [None]:
# -------- MODEL DEFINITION ---------

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),

    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(650, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(304, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(161, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(80, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# --------- TRAINING ---------
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    print("TPU found:", resolver.cluster_spec())
except ValueError:
    print("TPU not found")


try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    print("Running on TPU")
except ValueError:
    strategy = tf.distribute.get_strategy()  # default CPU/GPU strategy
    print("Running on CPU/GPU")

with strategy.scope():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(224, 224, 3)),
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Flatten(),

        tf.keras.layers.Dropout(0.20),
        tf.keras.layers.Dense(650, activation='relu'),
        tf.keras.layers.Dropout(0.20),
        tf.keras.layers.Dense(304, activation='relu'),
        tf.keras.layers.Dropout(0.20),
        tf.keras.layers.Dense(161, activation='relu'),
        tf.keras.layers.Dropout(0.20),
        tf.keras.layers.Dense(80, activation='relu'),
        tf.keras.layers.Dropout(0.20),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

#batch_size?

# --------- EVALUATE ON VALIDATION SET ---------
loss, acc = model.evaluate(val_ds, verbose=1)

print(f"\n✅ Evaluation Results:")
print(f"Test loss: {loss:.4f}")
print(f"Test accuracy: {acc:.4f}")

#---Plot


#Accuracy Plot
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training vs Validation Accuracy')
plt.legend()
plt.show()


# Loss Plot
plt.figure(figsize=(6, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Training vs Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()




TPU not found
Running on CPU/GPU
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m485s[0m 8s/step - accuracy: 0.5038 - loss: 0.6980

✅ Evaluation Results:
Test loss: 0.6963
Test accuracy: 0.5090


NameError: name 'history' is not defined