### Imports

In [2]:
# https://huggingface.co/Organika/sdxl-detector#validation-metrics
# The code below fine-tunes the model on the faces and art datasets (separately) and evaluates the model on their validation sets
# https://huggingface.co/blog/fine-tune-vit

import os
import torch
from transformers import AutoImageProcessor, SwinForImageClassification, TrainingArguments, Trainer
import evaluate
from datasets import load_dataset
import numpy as np

print(os.getcwd())
os.chdir("E:/Projects & Temp/GitHub/thesis") # have to go up one directory, can also use os.chdir("..")
print(os.getcwd())

# CUDA check 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
print(device)

e:\Projects & Temp\GitHub\thesis\RQ2
E:\Projects & Temp\GitHub\thesis
cuda


### Load Datasets and Processor

In [3]:
# Load model
processor = AutoImageProcessor.from_pretrained("Organika/sdxl-detector")
# model = AutoModelForImageClassification.from_pretrained("Organika/sdxl-detector")

# Load data
art_dataset_path = 'archive/datasets/art_512x512'
faces_dataset_path = 'archive/datasets/faces_512x512'
mixed_dataset_path = 'archive/datasets/mixed_512x512'

art_ds = load_dataset("imagefolder", data_dir=art_dataset_path) # wish i knew about this when i was doing RQ1
faces_ds = load_dataset("imagefolder", data_dir=faces_dataset_path)
mixed_ds = load_dataset("imagefolder", data_dir=mixed_dataset_path)

print(faces_ds)

Generating train split: 12800 examples [00:01, 8735.35 examples/s]
Generating validation split: 1600 examples [00:00, 8686.79 examples/s]
Generating test split: 1600 examples [00:00, 8690.72 examples/s]


DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 12800
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 1600
    })
})


### Setting Up

In [4]:
# Transform images to model input
def transform(image_batch):
    inputs = processor(images=image_batch['image'], return_tensors="pt")
    inputs['labels'] = torch.tensor(image_batch['label']).to(device)  # Ensure labels are tensors
    inputs['pixel_values'] = inputs['pixel_values'].to(device) 
    return inputs

art_ds_transformed = art_ds.with_transform(transform)
faces_ds_transformed = faces_ds.with_transform(transform)
mixed_ds_transformed = mixed_ds.with_transform(transform)

In [5]:
# Turn dicts into tensors
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]).to(device),
        'labels': torch.tensor([x['labels'] for x in batch]).to(device)
    }

In [6]:
# Define metrics
acc_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')

def compute_metrics(p):
    acc = acc_metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
    f1 = f1_metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
    return {"Accuracy": acc["accuracy"], "F1": f1["f1"]}

### Parameters

In [7]:
dataset_to_use = faces_ds_transformed
# dataset_to_use = art_ds_transformed
# dataset_to_use = mixed_ds_transformed

lr_values = [1e-5, 5e-5, 1e-4, 5e-4, 1e-3] 
chosen_lr = 1

output_dir_name = "./sdxl-fine-tune"
# output_dir_name = "./sdxl-fine-tune-art"
# output_dir_name = "./sdxl-fine-tune-mixed"

### Load Model

In [8]:
# Extract the labels
labels = dataset_to_use['train'].features['label'].names
print(labels[0:2])

# Load the pre-trained model
model = SwinForImageClassification.from_pretrained(
    "Organika/sdxl-detector", 
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
).to(device)

['0', '1']


In [9]:
# TrainingArguments: only learning rate is varied, rest are default and unchanged
training_args = TrainingArguments(
    output_dir=output_dir_name,
    per_device_train_batch_size=8,
    evaluation_strategy="steps",
    num_train_epochs=1,
    fp16=False, # True leads to runtime errors on CUDA
    save_steps=100,
    eval_steps=100,
    logging_steps=10,
    learning_rate=lr_values[chosen_lr],  
    lr_scheduler_type="constant",
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
    report_to="tensorboard",
    load_best_model_at_end=True,
    dataloader_pin_memory=False, # otherwise it doesn't work with CUDA
)




In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=dataset_to_use["train"],
    eval_dataset=dataset_to_use["validation"],
    tokenizer=processor,
)

  trainer = Trainer(


### Training and Evaluation

In [11]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

metrics = trainer.evaluate(dataset_to_use['validation'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)


  1%|          | 10/1600 [00:08<19:29,  1.36it/s]

{'loss': 1.3376, 'grad_norm': 10.94875431060791, 'learning_rate': 5e-05, 'epoch': 0.01}


  1%|▏         | 20/1600 [00:16<18:59,  1.39it/s]

{'loss': 0.6451, 'grad_norm': 8.44340705871582, 'learning_rate': 5e-05, 'epoch': 0.01}


  2%|▏         | 30/1600 [00:23<18:54,  1.38it/s]

{'loss': 0.6228, 'grad_norm': 13.224095344543457, 'learning_rate': 5e-05, 'epoch': 0.02}


  2%|▎         | 40/1600 [00:30<19:12,  1.35it/s]

{'loss': 0.5509, 'grad_norm': 6.007308483123779, 'learning_rate': 5e-05, 'epoch': 0.03}


  3%|▎         | 50/1600 [00:37<18:43,  1.38it/s]

{'loss': 1.0246, 'grad_norm': 23.82744789123535, 'learning_rate': 5e-05, 'epoch': 0.03}


  4%|▍         | 60/1600 [00:44<18:12,  1.41it/s]

{'loss': 0.6033, 'grad_norm': 10.467378616333008, 'learning_rate': 5e-05, 'epoch': 0.04}


  4%|▍         | 70/1600 [00:52<18:52,  1.35it/s]

{'loss': 0.6095, 'grad_norm': 5.320763111114502, 'learning_rate': 5e-05, 'epoch': 0.04}


  5%|▌         | 80/1600 [00:59<19:07,  1.33it/s]

{'loss': 0.4841, 'grad_norm': 10.995637893676758, 'learning_rate': 5e-05, 'epoch': 0.05}


  6%|▌         | 90/1600 [01:07<19:29,  1.29it/s]

{'loss': 0.6718, 'grad_norm': 11.432700157165527, 'learning_rate': 5e-05, 'epoch': 0.06}


  6%|▋         | 100/1600 [01:15<19:07,  1.31it/s]

{'loss': 0.5229, 'grad_norm': 10.034919738769531, 'learning_rate': 5e-05, 'epoch': 0.06}


                                                  
Non-default generation parameters: {'max_length': 128}


{'eval_loss': 0.4900659918785095, 'eval_Accuracy': 0.75625, 'eval_F1': 0.7036474164133738, 'eval_runtime': 66.1163, 'eval_samples_per_second': 24.2, 'eval_steps_per_second': 3.025, 'epoch': 0.06}


  7%|▋         | 110/1600 [02:36<41:19,  1.66s/it]  

{'loss': 0.4447, 'grad_norm': 9.875475883483887, 'learning_rate': 5e-05, 'epoch': 0.07}


  8%|▊         | 120/1600 [02:44<19:33,  1.26it/s]

{'loss': 0.7381, 'grad_norm': 12.143526077270508, 'learning_rate': 5e-05, 'epoch': 0.07}


  8%|▊         | 130/1600 [02:52<18:49,  1.30it/s]

{'loss': 0.5711, 'grad_norm': 8.8442964553833, 'learning_rate': 5e-05, 'epoch': 0.08}


  9%|▉         | 140/1600 [03:00<18:44,  1.30it/s]

{'loss': 0.3795, 'grad_norm': 7.078762054443359, 'learning_rate': 5e-05, 'epoch': 0.09}


  9%|▉         | 150/1600 [03:07<18:18,  1.32it/s]

{'loss': 0.4487, 'grad_norm': 4.199699401855469, 'learning_rate': 5e-05, 'epoch': 0.09}


 10%|█         | 160/1600 [03:15<19:11,  1.25it/s]

{'loss': 0.2988, 'grad_norm': 18.264429092407227, 'learning_rate': 5e-05, 'epoch': 0.1}


 11%|█         | 170/1600 [03:23<18:19,  1.30it/s]

{'loss': 0.2744, 'grad_norm': 15.226031303405762, 'learning_rate': 5e-05, 'epoch': 0.11}


 11%|█▏        | 180/1600 [03:31<18:23,  1.29it/s]

{'loss': 0.15, 'grad_norm': 20.187875747680664, 'learning_rate': 5e-05, 'epoch': 0.11}


 12%|█▏        | 190/1600 [03:39<20:52,  1.13it/s]

{'loss': 0.5209, 'grad_norm': 12.297005653381348, 'learning_rate': 5e-05, 'epoch': 0.12}


 12%|█▎        | 200/1600 [03:48<18:44,  1.24it/s]

{'loss': 0.9971, 'grad_norm': 22.87401580810547, 'learning_rate': 5e-05, 'epoch': 0.12}




RuntimeError: CUDA error: invalid argument
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
