In [2]:
import cv2
import numpy as np
from scipy.ndimage import gaussian_filter
import matplotlib.pyplot as plt
import numpy as np
from scipy.fft import fft2, fftshift
from scipy.signal import correlate2d
import time


# Preprocessing

In [3]:
def extract_noise_residual(image, sigma=1.0):
    """
    Extract noise residuals from an image using Gaussian denoising.

    Parameters:
    - image: Input image as a 2D NumPy array (grayscale) or 3D NumPy array (color).
    - sigma: Standard deviation for the Gaussian filter.

    Returns:
    - residual: Noise residuals as a 2D NumPy array (grayscale) or 3D NumPy array (color).
    """
 
    residual = np.zeros_like(image)
    for channel in range(3):
        denoised_channel = gaussian_filter(image[:, :, channel], sigma=sigma)
        residual[:, :, channel] = image[:, :, channel] - denoised_channel

    # Normalize the residual for visualization
    residual = residual - np.min(residual)
    residual = residual / np.max(residual)

    return residual


In [None]:
# Load an image in grayscale or color
image = cv2.imread('generated_image.png', cv2.IMREAD_COLOR)  # For grayscale
# For color image: image = cv2.imread('sample_image.jpg', cv2.IMREAD_COLOR)

# Normalize the image to [0, 1] range
image = image / 255.0

# Extract noise residuals
residual = extract_noise_residual(image, sigma=1.0)

# Plot the original image and its noise residuals
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.title('Original Image')
plt.imshow(image)

plt.subplot(1, 2, 2)
plt.title('Noise Residuals')
plt.imshow(residual)

plt.show()


# Feature Extraction

In [5]:
def compute_power_spectrum(residual):
    """
    Compute the power spectrum of the noise residual.

    Parameters:
    - residual: Noise residual as a 3D NumPy array (H x W x 3).

    Returns:
    - power_spectrum: The average power spectrum across all color channels.
    """
    power_spectra = []
    for channel in range(3):
        # Compute the 2D Fourier Transform and shift the zero-frequency component to the center
        f_transform = fft2(residual[:, :, channel])
        f_shifted = fftshift(f_transform)

        # Calculate the power spectrum
        power_spectrum = np.abs(f_shifted) ** 2
        power_spectra.append(power_spectrum)
    
    # Average the power spectra across all channels
    average_power_spectrum = np.mean(power_spectra, axis=0)
    return average_power_spectrum

In [None]:
power_spectrum = compute_power_spectrum(residual)

# Plot the power spectrum
plt.figure(figsize=(6, 6))
plt.title("Power Spectrum of Noise Residuals")
plt.imshow(np.log1p(power_spectrum), cmap='viridis')  # Logarithmic scale for better visualization
plt.colorbar()
plt.show()

In [7]:
def compute_autocorrelation(residual):
    """
    Compute the autocorrelation of the noise residual.

    Parameters:
    - residual: Noise residual as a 3D NumPy array (H x W x 3).

    Returns:
    - autocorrelation: The average autocorrelation function across all color channels.
    """
    print("calculating autocorrelation...")
    autocorrelations = []
    for channel in range(3):
        # Calculate 2D autocorrelation using correlate2d
        autocorr = correlate2d(residual[:, :, channel], residual[:, :, channel], mode='same')
        autocorrelations.append(autocorr)
    
    # Average the autocorrelations across all channels
    average_autocorrelation = np.mean(autocorrelations, axis=0)
    
    # Normalize the autocorrelation for visualization
    average_autocorrelation -= np.min(average_autocorrelation)
    average_autocorrelation /= np.max(average_autocorrelation)
    
    return average_autocorrelation

In [None]:
# Compute the autocorrelation of the residuals
autocorrelation = compute_autocorrelation(residual)

# Plot the autocorrelation
plt.figure(figsize=(6, 6))
plt.title("Autocorrelation of Noise Residuals")
plt.imshow(autocorrelation)
plt.colorbar()
plt.show()

# Basic Detection

In [None]:
def sparse_sample_image(image, stride=2):
    """
    Perform sparse sampling of an image by taking every nth pixel based on the stride.
    
    Parameters:
    - image: 3D NumPy array (H x W x 3).
    - stride: Step size for sampling (default is 2).

    Returns:
    - sampled_image: The sparsely sampled image.
    """
    sampled_image = image[::stride, ::stride, :]
    return sampled_image


start_time_sparse = time.time()
stride = 4
sparse_residual_image = sparse_sample_image(residual, stride=stride)
autocorr_sparse = compute_autocorrelation(sparse_residual_image)
time_sparse = time.time() - start_time_sparse

print("time sparse: ", time_sparse)

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.title('Unsampled')
plt.imshow(autocorrelation)

plt.subplot(1, 2, 2)
plt.title('Sparse')
plt.imshow(autocorr_sparse)



In [None]:

from sklearn.model_selection import train_test_split
from preprocess import process_dataset


print("here")
features, labels = process_dataset('data/ffhq/images1024x1024', '/Users/lincolnmercuro/Desktop/Deepfake Research/detector/data/diffusion')

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [None]:
# Train a Random Forest classifier
from sklearn.base import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns


model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))


cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True)

print("Accuracy Score:", accuracy_score(y_test, y_pred))