# Imports

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Dataset

In [20]:
def load_real_vs_fake_dataset(image_size=(224, 224), test_size=0.2):
    from datasets import load_dataset
    from PIL import Image
    import numpy as np
    from sklearn.model_selection import train_test_split
    from tensorflow.keras.utils import to_categorical

    def load_zip(file_name, label):
        ds = load_dataset("OpenRL/DeepFakeFace", data_files=file_name, split="train")
        images = []
        labels = []

        for example in ds:
            img = example['image'].resize(image_size, Image.BILINEAR)
            img_array = np.array(img).astype("float32") / 255.0
            
            # Make sure it's RGB
            if img_array.ndim == 2:
                img_array = np.stack([img_array] * 3, axis=-1)
            elif img_array.shape[2] == 1:
                img_array = np.concatenate([img_array] * 3, axis=-1)

            images.append(img_array)
            labels.append(label)
        
        return images, labels

    # Load both datasets
    real_imgs, real_labels = load_zip("wiki.zip", label=0)
    fake_imgs, fake_labels = load_zip("inpainting.zip", label=1)

    # Combine
    all_images = np.stack(real_imgs + fake_imgs)
    all_labels = np.array(real_labels + fake_labels)

    # Split
    x_train, x_test, y_train, y_test = train_test_split(
        all_images, all_labels, test_size=test_size, stratify=all_labels, random_state=42
    )

    # One-hot encode labels for softmax
    num_classes = 2
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    return (x_train, y_train), (x_test, y_test), num_classes



In [21]:
# Load your data
(x_train, y_train), (x_test, y_test), num_classes = load_real_vs_fake_dataset()

print(f"x_train shape: {x_train.shape}")
print(f"{x_train.shape[0]} training samples")
print(f"{x_test.shape[0]} test samples")

# Define your model as before
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(650, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(304, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(161, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(80, activation='relu'),
    tf.keras.layers.Dropout(0.20),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10, batch_size=32)



Generating train split: 30000 examples [00:42, 709.71 examples/s]


MemoryError: 