<a href="https://colab.research.google.com/github/skyler-marks/ai-image-detection/blob/main/Start.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================
# 1. Setup & Installations
# ==========================
!pip install timm torch torchvision pillow tqdm

import timm
import torch
import torchvision.transforms as T
from torch import nn
from PIL import Image
import numpy as np
import os
from tqdm import tqdm

In [None]:
# ==========================
# 2. Load ResNet18 (ImageNet pretrained)
# ==========================
import torchvision.models as models
device = 'cuda' if torch.cuda.is_available() else 'cpu'


model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Identity()   # remove classifier, output 512-D features
model.eval()
model.to(device)

# Transformation for CIFAKE (32x32 -> 224x224 for ResNet)
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # ImageNet normalization
])


In [None]:
# ==========================
# 4. Run Extraction for CIFAKE
# ==========================
!pip install kaggle


# Download CIFAKE dataset
!kaggle datasets download -d birdy654/cifake-real-and-ai-generated-synthetic-images
!unzip cifake-real-and-ai-generated-synthetic-images.zip -d cifake


In [None]:
# ==========================
# 3. Feature Extraction Function
# ==========================
def extract_features(image_dir, output_file):
    feats = []
    files = sorted(os.listdir(image_dir))
    for fname in tqdm(files[:1000]):  # limit to 1000 images for demo
        path = os.path.join(image_dir, fname)
        img = Image.open(path).convert('RGB')
        x = transform(img).unsqueeze(0).to(device)
        with torch.no_grad():
            f = model(x).cpu().numpy()[0]
        feats.append(f)
    feats = np.array(feats)
    np.save(output_file, feats)
    print(f"✅ Saved {feats.shape} to {output_file}")


In [None]:
for i in range(1, 11):
    real_dir = "cifake/train/REAL/cat_" + str(i).zfill(2) 
    fake_dir = "cifake/train/FAKE/cat_" + str(i).zfill(2) 
    # Extract and save
    extract_features(real_dir, "real_features_" + str(i).zfill(2) + ".npy")
    extract_features(fake_dir, "fake_features_" + str(i).zfill(2) + ".npy")


In [None]:
# ==========================
# 1. Install and Import Libraries
# ==========================
!pip install ripser persim matplotlib numpy

from ripser import ripser
from persim import plot_diagrams
import numpy as np
import matplotlib.pyplot as plt

# ==========================
# 2. Load Feature Vectors
# ==========================
# Load the features you previously extracted with ResNet18
m = 3
real_feats_list = np.array([np.load("real_features_" + str(i).zfill(2) +  ".npy") for i in range(1, m+1)])
fake_feats_list = np.array([np.load("fake_features_" + str(i).zfill(2) +  ".npy") for i in range(1, m+1)])
# real_unreduced = np.array([np.load("real_features_unreduced_" + str(i).zfill(2) +  ".npy") for i in range(1, 4)])
# fake_unreduced = np.array([np.load("fake_features_unreduced_" + str(i).zfill(2) +  ".npy") for i in range(1, 4)])

# Optionally, sample to reduce computation
n = 300   # choose 300 points for speed
# real_sample = real_feats[np.random.choice(len(real_feats), size=n, replace=False)]
# fake_sample = fake_feats[np.random.choice(len(fake_feats), size=n, replace=False)]

# print("Real sample shape:", real_sample.shape)
# print("Fake sample shape:", fake_sample.shape)
# print("Real unreduced sample shape:", real_unreduced.shape)
# print("Fake unreduced sample shape:", fake_unreduced.shape)

# ==========================
# 3. Combine Data for Joint Analysis
# ==========================
# X = np.vstack([real_sample, fake_sample])

# Create labels (0 for real, 1 for fake)
# labels = np.array([0]*len(real_sample) + [1]*len(fake_sample))



In [None]:
real_list = np.array([[item.flatten() for item in cat] for cat in real_feats_list])
fake_list = np.array([[item.flatten() for item in cat] for cat in fake_feats_list])


In [None]:
from ripser import ripser
print("Computing real sample:")
rip_real = [ripser(proc, do_cocycles=True, maxdim=2) for proc in real_list]
dgms_real = [element['dgms'] for element in rip_real]

In [None]:
from ripser import ripser
print("Computing fake sample:")
rip_fake = [ripser(proc, do_cocycles=True, maxdim=2) for proc in fake_list]

dgms_real = [element['dgms'] for element in rip_fake]

In [None]:
print(rip_fake)

In [None]:
plot_diagrams(dgms_fake[0])
plot_diagrams(dgms_real[0])

In [None]:

plot_diagrams(dgms_fake[1])
plot_diagrams(dgms_real[1])

In [None]:

plot_diagrams(dgms_fake[2])
plot_diagrams(dgms_real[2])

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def compute_betti_curve(diagrams, dim=0, resolution=200):
    """
    Compute Betti curve for a given homology dimension from a persistence diagram.
    - diagrams: list of persistence pairs from Ripser (dgms[dim])
    - dim: 0 for connected components, 1 for loops, etc.
    - resolution: number of scale steps
    """
    dgms = diagrams[dim]
    # Ignore points at infinity
    dgms = dgms[np.isfinite(dgms[:, 1])]

    # Define filtration scales
    max_scale = np.max(dgms[:, 1])
    scales = np.linspace(0, max_scale, resolution)
    betti_vals = []

    # Count number of features alive at each scale
    for s in scales:
        alive = np.sum((dgms[:, 0] <= s) & (dgms[:, 1] > s))
        betti_vals.append(alive)

    return scales, np.array(betti_vals)

# ✅ Example: Plot Betti curves for Real and Fake latent spaces
scales_r, betti_r = compute_betti_curve(dgms_real, dim=0)
scales_f, betti_f = compute_betti_curve(dgms_fake, dim=0)

plt.figure(figsize=(7,4))
plt.plot(scales_r, betti_r, label="Real H0", lw=2)
plt.plot(scales_f, betti_f, label="Fake H0", lw=2, ls="--")
plt.xlabel("Scale (ε)")
plt.ylabel("Betti-0 (# of connected components)")
plt.title("Betti Curves (Real vs Fake)")
plt.legend()
plt.show()


In [None]:
scales_r1, betti_r1 = compute_betti_curve(dgms_real, dim=1)
plt.plot(scales_r1, betti_r1, label="Real H1 (Loops)")
plt.legend(); plt.show()


In [None]:
# ✅ Compute Betti curves for both sets
scales_r, betti_r = compute_betti_curve(dgms_real, dim=0)
scales_f, betti_f = compute_betti_curve(dgms_fake, dim=0)

# ✅ Plot together
plt.figure(figsize=(8,4))
plt.plot(scales_r, betti_r, label="Real H0", lw=2, color="blue")
plt.plot(scales_f, betti_f, label="Fake H0", lw=2, ls="--", color="red")
plt.xlabel("Scale (ε)")
plt.ylabel("Betti-0 (# of components)")
plt.title("Betti Curves: Real vs Fake Latent Features")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
print("Real H0 pairs:", dgms_real[0].shape)
print("Fake H0 pairs:", dgms_fake[0].shape)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def compute_betti_curve(diagrams, dim=0, resolution=200):
    """
    Compute Betti curve for a given homology dimension from Ripser diagrams.
    - diagrams: result['dgms'] from ripser
    - dim: 0 (components) or 1 (loops)
    - resolution: number of scale steps
    """
    dgms = diagrams[dim]
    dgms = dgms[np.isfinite(dgms[:, 1])]  # remove infinite deaths

    if len(dgms) == 0:  # handle empty diagrams
        return np.array([0]), np.array([0])

    max_scale = np.max(dgms[:, 1])
    scales = np.linspace(0, max_scale, resolution)
    betti_vals = []

    for s in scales:
        alive = np.sum((dgms[:, 0] <= s) & (dgms[:, 1] > s))
        betti_vals.append(alive)

    return scales, np.array(betti_vals)

# ✅ Compute for both Real and Fake
sc_r1_1, br1_1 = compute_betti_curve(dgms_real[0], dim=1)
sc_f1_1, bf1_1 = compute_betti_curve(dgms_fake[0], dim=1)
sc_r1_2, br1_2 = compute_betti_curve(dgms_real[1], dim=1)
sc_f1_2, bf1_2 = compute_betti_curve(dgms_fake[1], dim=1)
sc_r1_3, br1_3 = compute_betti_curve(dgms_real[2], dim=1)
sc_f1_3, bf1_3 = compute_betti_curve(dgms_fake[2], dim=1)
# sc_r2, br2 = compute_betti_curve(dgms_real, dim=2)
# sc_f2, bf2 = compute_betti_curve(dgms_fake, dim=2)

# ✅ Plot H0 (Connected Components)
# plt.figure(figsize=(8,4))
# plt.plot(sc_r0, br0, lw=2, label="Real H0", color="blue")
# plt.plot(sc_f0, bf0, lw=2, ls="--", label="Fake H0", color="red")
# plt.xlabel("Scale (ε)")
# plt.ylabel("Betti-0 (# components)")
# plt.title("Betti Curves (H0): Real vs Fake")
# plt.legend()
# plt.grid(True)
# plt.show()

# ✅ Plot H1 (Loops)
plt.figure(figsize=(8,4))
plt.plot(sc_r1_1, br1_1, lw=2, label="Real H1", color="green")
plt.plot(sc_f1_1, bf1_1, lw=2, ls="--", label="Fake H1", color="orange")
plt.xlabel("Scale (ε)")
plt.ylabel("Betti-1 (# loops)")
plt.title("Betti Curves (H1): Real vs Fake")
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(8,4))
plt.plot(sc_r1_2, br1_2, lw=2, label="Real H1", color="green")
plt.plot(sc_f1_2, bf1_2, lw=2, ls="--", label="Fake H1", color="orange")
plt.xlabel("Scale (ε)")
plt.ylabel("Betti-1 (# loops)")
plt.title("Betti Curves (H1): Real vs Fake")
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(8,4))
plt.plot(sc_r1_3, br1_3, lw=2, label="Real H1", color="green")
plt.plot(sc_f1_3, bf1_3, lw=2, ls="--", label="Fake H1", color="orange")
plt.xlabel("Scale (ε)")
plt.ylabel("Betti-1 (# loops)")
plt.title("Betti Curves (H1): Real vs Fake")
plt.legend()
plt.grid(True)
plt.show()

# ✅ Plot H2 (Loops)
# plt.figure(figsize=(8,4))
# plt.plot(sc_r2, br2, lw=2, label="Real H2", color="green")
# plt.plot(sc_f2, bf2, lw=2, ls="--", label="Fake H2", color="orange")
# plt.xlabel("Scale (ε)")
# plt.ylabel("Betti-2 (# loops)")
# plt.title("Betti Curves (H2): Real vs Fake")
# plt.legend()
# plt.grid(True)
# plt.show()
# 
# 