In [1]:
try:
    # Comment out if not using colab
    from google.colab import drive
    drive.mount('/content/drive')

    # Specific for luca's computer
    %cd "/content/drive/Othercomputers/Min MacBook Pro/INFO381-GitHub"
    using_colab = True
except:
    print("Not using Google Colab")
    using_colab = False

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/Othercomputers/Min MacBook Pro/INFO381-GitHub


**To run code with CLIP, either have git install or run on Google Colab**

In [2]:
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-v73_x3bm
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-v73_x3bm
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [1]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.models as models
import torch.nn as nn
from sklearn.metrics import accuracy_score, classification_report
import clip


# Local imports
import sys
import os

if using_colab:
    sys.path.append(os.path.abspath("helper_functions"))
else:
    sys.path.append(os.path.abspath("../helper_functions"))
from utils import get_dataloaders
from model_definitions import CLIPClassifier

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**Load both train and test dataloaders for the CNN model**

In [4]:
# Define resize transform for the CNN
resize_transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor()
])

cnn_train_loader, cnn_test_loader = get_dataloaders(
    zip_path="fake_vs_real.zip",
    batch_size=32,
    split='both',
    transform=resize_transform
)

cnn_model = models.resnet18(pretrained=False)
num_ftrs = cnn_model.fc.in_features
cnn_model.fc = nn.Linear(num_ftrs, 2)
cnn_model.load_state_dict(torch.load("models/resnet18_cnn.pth"))

cnn_model.to(device)
cnn_model.eval()

Running in Google Colab




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

**Load the CLIP backbone and our classifier model**

In [5]:
clip_model, preprocess = clip.load("ViT-B/32", device=device)
clip_classifier = CLIPClassifier(clip_model, embed_dim=512, num_classes=2).to(device)
clip_classifier.load_state_dict(torch.load("models/clip_classifier_10epochs.pth", map_location=device))
clip_train_loader, clip_test_loader = get_dataloaders(zip_path="fake_vs_real.zip", batch_size=32, split='both', transform=preprocess)
clip_classifier.eval()

Running in Google Colab


CLIPClassifier(
  (clip_model): CLIP(
    (visual): VisionTransformer(
      (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
      (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (transformer): Transformer(
        (resblocks): Sequential(
          (0): ResidualAttentionBlock(
            (attn): MultiheadAttention(
              (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
            )
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (mlp): Sequential(
              (c_fc): Linear(in_features=768, out_features=3072, bias=True)
              (gelu): QuickGELU()
              (c_proj): Linear(in_features=3072, out_features=768, bias=True)
            )
            (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (1): ResidualAttentionBlock(
            (attn): MultiheadAttention(
              (out_proj): NonDynamicallyQu

**Define evaluation function (Works for both models)**

In [11]:
def evaluate(model, dataloader):
    all_preds, all_labels = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)

    return accuracy, all_preds, all_labels

**Evaluate accuracy on train and test sets on CNN**

In [9]:
accuracy_train_cnn, preds_train_cnn, labels_train_cnn = evaluate(cnn_model, cnn_train_loader)
accuracy_test_cnn, preds_test_cnn, labels_test_cnn = evaluate(cnn_model, cnn_test_loader)

print(f"ResNet18 Train Accuracy: {accuracy_train_cnn:.2f}")
print(f"ResNet18 Test Accuracy: {accuracy_test_cnn:.2f}")




ResNet18 Train Accuracy: 0.94
ResNet18 Test Accuracy: 0.89


**Evaluate accuracy on train and test sets on CLIP**

In [13]:
# 3) Evaluate on both sets
accuracy_train_clip, preds_train_clip, labels_train_clip = evaluate(clip_classifier, clip_train_loader)
accuracy_test_clip, preds_test_clip, labels_test_clip = evaluate(clip_classifier, clip_test_loader)

print(f"CLIP ViT Train Accuracy: {accuracy_train_clip:.2f}")
print(f"CLIP ViT Test Accuracy: {accuracy_test_clip:.2f}")




CLIP ViT Train Accuracy: 0.97
CLIP ViT Test Accuracy: 0.95


**Evaluate metrics on train and test sets on CNN**

In [14]:
print("Classification Report (Train):")
print(classification_report(labels_train_cnn, preds_train_cnn))

print("Classification Report (Test):")
print(classification_report(labels_test_cnn, preds_test_cnn))

Classification Report (Train):
              precision    recall  f1-score   support

           0       0.96      0.92      0.94      3024
           1       0.92      0.96      0.94      3023

    accuracy                           0.94      6047
   macro avg       0.94      0.94      0.94      6047
weighted avg       0.94      0.94      0.94      6047

Classification Report (Test):
              precision    recall  f1-score   support

           0       0.91      0.87      0.89       756
           1       0.88      0.92      0.90       757

    accuracy                           0.89      1513
   macro avg       0.89      0.89      0.89      1513
weighted avg       0.89      0.89      0.89      1513



**Evaluate metrics on train and test sets on CLIP**

In [15]:
print("CLIP ViT Classification Report (Train):")
print(classification_report(labels_train_clip, preds_train_clip))

print("CLIP ViT Classification Report (Test):")
print(classification_report(labels_test_clip, preds_test_clip))

CLIP ViT Classification Report (Train):
              precision    recall  f1-score   support

           0       0.96      0.97      0.97      3024
           1       0.97      0.96      0.96      3023

    accuracy                           0.97      6047
   macro avg       0.97      0.97      0.97      6047
weighted avg       0.97      0.97      0.97      6047

CLIP ViT Classification Report (Test):
              precision    recall  f1-score   support

           0       0.94      0.95      0.95       756
           1       0.95      0.94      0.95       757

    accuracy                           0.95      1513
   macro avg       0.95      0.95      0.95      1513
weighted avg       0.95      0.95      0.95      1513

