## Models  
https://huggingface.co/Organika/sdxl-detector  
https://huggingface.co/NYUAD-ComNets/NYUAD_AI-generated_images_detector  
https://huggingface.co/RishiDarkDevil/ai-image-det-resnet152

## Imports

In [1]:
from transformers import AutoModelForImageClassification, AutoProcessor
import torch
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())

2.7.0.dev20250302+cu128
12.8
True


## Load Models

In [3]:
model1 = AutoModelForImageClassification.from_pretrained("Organika/sdxl-detector")
model2 = AutoModelForImageClassification.from_pretrained("NYUAD-ComNets/NYUAD_AI-generated_images_detector")
model3 = AutoModelForImageClassification.from_pretrained("RishiDarkDevil/ai-image-det-resnet152")

processor1 = AutoProcessor.from_pretrained("Organika/sdxl-detector")
processor2 = AutoProcessor.from_pretrained("NYUAD-ComNets/NYUAD_AI-generated_images_detector")
# processor3 = AutoProcessor.from_pretrained("RishiDarkDevil/ai-image-det-resnet152") # Processor does not exist, will transform manually

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at RishiDarkDevil/ai-image-det-resnet152 and are newly initialized: ['classifier.1.bias', 'classifier.1.weight', 'embedder.embedder.convolution.weight', 'embedder.embedder.normalization.bias', 'embedder.embedder.normalization.num_batches_tracked', 'embedder.embedder.normalization.running_mean', 'embedder.embedder.normalization.running_var', 'embedder.embedder.normalization.weight', 'encoder.stages.0.layers.0.layer.0.convolution.weight', 'encoder.stages.0.layers.0.layer.0.normalization.bias', 'encoder.stages.0.layers.0.layer.0.normalization.num_batches_tracked', 'encoder.stages.0.layers.0.layer.0.normalization.running_mean', 'encoder.stages.0.layers.0.layer.0.normalization.running_var', 'encoder.stages.0.layers.0.layer.0.normalization.weight', 'encoder.stages.0.layers.0.layer.1.convolution.weight', 'encoder.stages.0.layers.0.layer.1.normalization.bias', 'encoder.stages.0.layers.0.layer.1.normaliz

In [4]:
model3.eval()

ResNetForImageClassification(
  (resnet): ResNetModel(
    (embedder): ResNetEmbeddings(
      (embedder): ResNetConvLayer(
        (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU()
      )
      (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (encoder): ResNetEncoder(
      (stages): ModuleList(
        (0): ResNetStage(
          (layers): Sequential(
            (0): ResNetBottleNeckLayer(
              (shortcut): ResNetShortCut(
                (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (layer): Sequential(
                (0): ResNetConvLayer(
                  (convolution): Conv2d(64

## Load Datasets

In [5]:
dir = "archive/datasets"

def load_dataset(dir, batch_size, transform=None):
    dataset = datasets.ImageFolder(dir, transform=transform) # test folder has 0 and 1 subfolders
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return loader

# transforming to tensors for M1 and M2
transform = transforms.Compose([
    transforms.ToTensor()
])

# load art and faces datasets turned into tensors
artTest = load_dataset(dir + "/art_512x512/test", 32, transform)
facesTest = load_dataset(dir + "/faces_512x512/test", 32, transform)

# transforming to tensors with additional normalization for M3
transformM3 = transforms.Compose([
    transforms.Resize((224, 224)),   
    transforms.ToTensor(),            
    transforms.Normalize(                
        mean=[0.485, 0.456, 0.406],         
        std=[0.229, 0.224, 0.225]
    ),
])

# load art and faces datasets turned into tensors for M3
artTestM3 = load_dataset(dir + "/art_512x512/test", 32, transformM3)
facesTestM3 = load_dataset(dir + "/faces_512x512/test", 32, transformM3)

# for images, labels in artTest:
#     print(images.min(), images.max())  # Check pixel value range
#     print(images.shape)  # Check the image shape

## Test and Evaluate Models

In [6]:
def evaluate_model(model, processor, dataloader):
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, labels in dataloader:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            #print(device)
            model.to(device)
            images = images.to(device)

            if processor:  
                # For models 1 and 2, use the processor
                inputs = processor(images, return_tensors="pt").to(device)
                outputs = model(**inputs)
            else:  
                # For model 3, images are already transformed (done in load_dataset)
                outputs = model(images)

            predictions = torch.argmax(outputs.logits, dim=1).cpu().numpy()
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predictions)
            # print("True labels:", labels.cpu().numpy())
            # print("Predictions:", predictions)

    return y_true, y_pred

In [7]:
def evaluation_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_pred)
    
    return accuracy, precision, recall, f1, roc_auc

In [8]:
def test_and_results(model, processor, dataloader):
    labels, preds = evaluate_model(model, processor, dataloader)
    accuracy, precision, recall, f1, auc = evaluation_metrics(labels, preds)

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1: {f1}")
    print(f"AUC: {auc}")

In [9]:
print("Model 1, Art")
test_and_results(model1, processor1, artTest)
print("Model 1, Faces")
test_and_results(model1, processor1, facesTest)

Model 1, Art


It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.


KeyboardInterrupt: 

In [10]:
print("Model 2, Art")
test_and_results(model2, processor2, artTest)
print("Model 2, Faces")
test_and_results(model2, processor2, facesTest)

Model 2, Art
Accuracy: 0.5
Precision: 0.5
Recall: 1.0
F1: 0.6666666666666666
AUC: 0.5
Model 2, Faces


KeyboardInterrupt: 

In [None]:
print("Model 3, Art")
test_and_results(model3, None, artTestM3)
print("Model 3, Faces")
test_and_results(model3, None, facesTestM3)

Model 3, Art
True labels: [0 1 1 1 1 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0]
Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
True labels: [0 1 1 0 1 0 0 1 0 0 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0 0 1 0 1 0 1 0]
Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
True labels: [0 0 1 1 1 0 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 1 1 1 0 0 0 1 1 0 0 0]
Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
True labels: [1 0 0 1 1 1 0 1 1 1 0 1 0 1 0 0 1 1 1 0 1 0 0 1 0 0 1 1 0 0 0 0]
Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
True labels: [1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 0 1 0 0 1 1]
Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
True labels: [1 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1 0]
Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
True labels: [1 1 0 1 0 0 0 1 0 1 1 0 0

KeyboardInterrupt: 