In [1]:
# Step 1: Load ViT

# Reload my test dataset 
import pandas as pd
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from csv_dataset import CSVDataset  # Ensure this class is implemented correctly

# Define transformations (same as used during testing)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
])

# Reload sampled test dataset
test_df = pd.read_csv("sampled_test.csv")  # Ensure file exists

# Create dataset & dataloader
test_data = CSVDataset("sampled_test.csv", transform=transform)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)


In [2]:
# Debug: Check if test_dataloader loads data correctly
data_iter = iter(test_dataloader)
try:
    images, labels = next(data_iter)
    print("Batch shape:", images.shape, labels.shape)
    print("Sample labels:", labels[:10])  # Print first 10 labels
except StopIteration:
    print("Error: test_dataloader is empty! Check your dataset loading.")


Batch shape: torch.Size([32, 3, 224, 224]) torch.Size([32])
Sample labels: tensor([1, 0, 1, 0, 1, 1, 0, 1, 0, 1])


In [3]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from tqdm import tqdm  # Import tqdm for progress bar

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load trained model
model_path = "/Users/mandylee/Documents/AI/GenAIProject/MarchRuns/Metaclassifier/model2_complete.pth"
pretrained_vit = torch.load(model_path, map_location=device, weights_only=False)
pretrained_vit.to(device)
pretrained_vit.eval()

# Get ViT predictions
vit_probs = []  # Store probabilities

with torch.no_grad():
    for images, _ in tqdm(test_dataloader, desc="ViT Predictions", unit="batch"):
        images = images.to(device)
        outputs = pretrained_vit(images)
        probs = torch.nn.functional.softmax(outputs, dim=1).cpu().numpy()  # Convert to probabilities
        vit_probs.extend(probs)  # Store all probability pairs

# Convert to a NumPy array for easy handling
vit_probs = np.array(vit_probs)

# Add probabilities as separate columns in DataFrame
test_df["ViT_Prob_Real"] = vit_probs[:, 0]  # Probability of Real
test_df["ViT_Prob_Fake"] = vit_probs[:, 1]  # Probability of Fake

# # Save to CSV
# test_df.to_csv("updatedprob_test25mar.csv", index=False)
# print("Saved ViT probabilities to updated_test.csv")


ViT Predictions: 100%|██████████| 63/63 [02:18<00:00,  2.19s/batch]


In [4]:
print(vit_probs)

[[5.2080886e-04 9.9947923e-01]
 [9.8388231e-01 1.6117666e-02]
 [3.1884709e-01 6.8115294e-01]
 ...
 [9.8710644e-01 1.2893634e-02]
 [1.3507353e-02 9.8649269e-01]
 [1.8466439e-02 9.8153359e-01]]


In [9]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load MobileNetV2 model
mobilenet_model = tf.keras.models.load_model(
    "/Users/mandylee/Documents/AI/GenAIProject/MarchRuns/Metaclassifier/mobilenet_final_model-finetuned2.keras"
)

# Load test dataset
test_df = pd.read_csv("sampled_test.csv")  # Load original test dataset

# Ensure DataFrame has correct columns
test_df = test_df[['image_path', 'label']].rename(columns={"image_path": "filename"})
test_df['label'] = test_df['label'].astype(str)

# Define ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1./255)

# Create test generator (shuffle=False to keep order intact)
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col="filename",
    y_col="label",
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  
    shuffle=False
)

# Get MobileNetV2 probabilities
mobilenet_probs = mobilenet_model.predict(test_generator)  # Probability of Fake
mobilenet_probs = np.hstack([1 - mobilenet_probs, mobilenet_probs])  # Convert to [Real prob, Fake prob]

# Convert probabilities to DataFrame
mobilenet_df = pd.DataFrame(mobilenet_probs, columns=["MobileNet_Prob_Real", "MobileNet_Prob_Fake"])

# Load existing ViT results CSV
vit_df = pd.read_csv("updatedprob_test25mar.csv")

# Ensure row order matches (important when merging)
if len(vit_df) == len(mobilenet_df):
    combined_df = pd.concat([vit_df, mobilenet_df], axis=1)  # Merge as new columns
    combined_df.to_csv("updatedprob_test25mar.csv", index=False)  # Save back
    print("Updated 'updatedprob_test25mar.csv' with MobileNetV2 probabilities.")
else:
    print("Error: Mismatch in dataset sizes. Ensure both test datasets match.")

Found 2000 validated image filenames belonging to 2 classes.


  self._warn_if_super_not_called()
E0000 00:00:1742898259.148433 1122281 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 120ms/step
Updated 'updatedprob_test25mar.csv' with MobileNetV2 probabilities.
