In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Step 1: Import Libraries
import tensorflow as tf
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split

# Step 2: Set Paths and Parameters
data_dir = '/content/data'  # Path to data folder in Colab
img_size = (224, 224)  # Size expected by Xception model
batch_size = 8  # Small batch size for GPU training
frames_per_video = 1  # Number of frames to extract per video

# Step 3: Load and Preprocess Data
def load_data(data_dir):
    images = []
    labels = []
    for label, folder in enumerate(['real', 'fake']):
        path = os.path.join(data_dir, folder)
        print(f"Loading files from {path}...")
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            if file.endswith('.png'):
                # Load .png images
                img = cv2.imread(file_path)
                if img is None:
                    continue
                img = cv2.resize(img, img_size)
                img = img / 255.0
                images.append(img)
                labels.append(label)  # 0 for real, 1 for fake
            elif file.endswith('.mp4'):
                cap = cv2.VideoCapture(file_path)
                frame_count = 0
                while frame_count < frames_per_video and cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break
                    img = cv2.resize(frame, img_size)
                    img = img / 255.0  # Normalize to 0-1
                    images.append(img)
                    labels.append(label)
                    frame_count += 1
                cap.release()
            elif file.endswith('.npy'):
                try:
                    data = np.load(file_path)
                    if len(data.shape) == 3 and data.shape[2] in [1, 3]:  # Grayscale or RGB
                        img = cv2.resize(data, img_size)
                        if img.shape[2] == 1:
                            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                        img = img / 255.0  # Normalize to 0-1
                        images.append(img)
                        labels.append(label)
                except Exception as e:
                    print(f"Skipping .npy file {file_path}: {str(e)}")
                    continue

    return np.array(images), np.array(labels)

# Load the data
print("Loading data...")
X, y = load_data(data_dir)
if len(X) == 0:
    raise ValueError("No files found in the data folder. Please check your dataset.")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")

# Step 4: Build the Model
base_model = Xception(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=x)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 6: Train the Model
print("Training model...")
model.fit(X_train, y_train, epochs=5, batch_size=batch_size, validation_data=(X_test, y_test))

# Step 7: Save the Model
model.save('/content/deepfake_detector.h5')
print("Model saved as /content/deepfake_detector.h5")