### Installation

In [None]:
%%capture
import os, re
!pip install unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2

### Model Installation

In [None]:
from unsloth import FastVisionModel
import torch
model, tokenizer = FastVisionModel.from_pretrained(
    "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
    load_in_4bit = True,
    use_gradient_checkpointing = "unsloth",
)

In [None]:
model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers     = True,
    finetune_language_layers   = True,
    finetune_attention_modules = True,
    finetune_mlp_modules       = True,
    r = 16,
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,

)

### Data Prep

In [None]:
!!for i in {0..4}; do wget -q https://huggingface.co/datasets/saberzl/SID_Set/resolve/main/data/train-$(printf "%05d" $i)-of-00249.parquet; done
from datasets import load_dataset


file_names = [f"train-{i:05d}-of-00249.parquet" for i in range(5)]

# ['train-00000-of-00249.parquet', 'train-00001-of-00249.parquet', 'train-00002-of-00249.parquet', 'train-00003-of-00249.parquet', 'train-00004-of-00249.parquet']

dataset = load_dataset("parquet", data_files=file_names)

print(dataset)

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['img_id', 'image', 'mask', 'width', 'height', 'label'],
        num_rows: 844
    })
})


In [None]:
train_dataset = dataset["train"]

In [None]:
# remove images with label 2
dataset_no_label_2 = train_dataset.filter(lambda example: example['label'] != 2, num_proc=12)

# choose only images with width 1024
dataset_1024_only = dataset_no_label_2.filter(lambda example: example['width'] == 1024, num_proc=12)


dataset_0_1024 = dataset_1024_only.filter(lambda e: e['label'] == 0,  num_proc=12)
dataset_1_1024 = dataset_1024_only.filter(lambda e: e['label'] == 1,  num_proc=12)

In [None]:
# undersample label 1 to match label 0 count
dataset_label_1_undersampled = dataset_1_1024.select(
    range(len(dataset_1_1024)),
).shuffle(seed=42).select(range(1134))

In [None]:
from datasets import concatenate_datasets

dataset_dengelenmis_final = concatenate_datasets([
    dataset_0_1024,                   # 1134 item
    dataset_label_1_undersampled       # 1134 item
])

# shuffle dataset for better training
dataset_dengelenmis_final = dataset_dengelenmis_final.shuffle(seed=42)

In [None]:
import os
import io
import random
import pandas as pd
from datasets import ClassLabel, load_dataset, Features, Value
from torchvision import transforms
import torchvision.transforms.functional as TF
from PIL import Image


SAVE_DIR = "/content/drive/MyDrive/Cv/SID_Set"
os.makedirs(SAVE_DIR, exist_ok=True)

num_cores = os.cpu_count() or 2

print(f"Starting, the path is: {SAVE_DIR}")

# convert label colon to ClassLabel to work with
yeni_ozellikler = dataset_dengelenmis_final.features.copy()
yeni_ozellikler["label"] = ClassLabel(num_classes=2)
dataset_dengelenmis_final = dataset_dengelenmis_final.cast(yeni_ozellikler)

#%70 Train, %15 Validation, %15 Test
bolunmus = dataset_dengelenmis_final.train_test_split(
    test_size=0.3, seed=42, stratify_by_column="label"
)
train_split = bolunmus["train"]
temp_split = bolunmus["test"]

ikinci_bolme = temp_split.train_test_split(
    test_size=0.5, seed=42, stratify_by_column="label"
)
val_split = ikinci_bolme["train"]
test_split = ikinci_bolme["test"]

print(f"Splits ready, Train: {len(train_split)}, Val: {len(val_split)}, Test: {len(test_split)}")


TARGET_SIZE = (512, 512)

def compress_image(image):
    if image.mode != 'RGB':
        image = image.convert('RGB')
    buffer = io.BytesIO()

    q = random.randint(50, 95)
    image.save(buffer, format="JPEG", quality=q, optimize=True)
    buffer.seek(0)
    return Image.open(buffer)

train_transforms_base = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.02),
])

# apply augmentation only to train data
def train_augment(image):
    if image.mode != 'RGB':
        image = image.convert('RGB')

    img = train_transforms_base(image)

    resizer = transforms.RandomResizedCrop(
        size=TARGET_SIZE,
        scale=(0.4, 1.0),
        ratio=(0.8, 1.25),
        interpolation=transforms.InterpolationMode.BILINEAR
    )
    img = resizer(img)

    img = compress_image(img)
    return img

def val_test_process(image):
    if image.mode != 'RGB':
        image = image.convert('RGB')

    return image

# images are saved to disk to not wait for augmentation next time
def process_and_save(batch, split_name="train"):
    filenames_list = []
    labels_list = []
    splits_list = []

    for i in range(len(batch["image"])):
        img = batch["image"][i]
        lbl = batch["label"][i]

        base_id = batch["img_id"][i] if "img_id" in batch else f"{split_name}_{i}"
        safe_base_id = str(base_id).replace("/", "_")

        if split_name == "train":
            final_img = train_augment(img)
            filename = f"{safe_base_id}_aug.jpg"
        else:
            final_img = val_test_process(img)
            filename = f"{safe_base_id}_std.jpg" # std = standard

        save_path = os.path.join(SAVE_DIR, filename)

        try:
            final_img.save(save_path, "JPEG")
            filenames_list.append(filename)
            labels_list.append(lbl)
            splits_list.append(split_name)
        except Exception as e:
            print(f"Hata: {e}")

    return {
        "file_name": filenames_list,
        "label": labels_list,
        "split": splits_list
    }

all_metadata_dfs = []

datasets_to_process = [
    ("train", train_split),
    ("validation", val_split),
    ("test", test_split)
]

for name, dset in datasets_to_process:
    print(f"starting to save images to disk")

    processed = dset.map(
        process_and_save,
        batched=True,
        batch_size=50,
        num_proc=num_cores,
        fn_kwargs={"split_name": name},
        remove_columns=dset.column_names,
        desc=f"Saving {name}"
    )

    df_part = processed.to_pandas()
    all_metadata_dfs.append(df_part)

print("\nMetadata step..")
full_metadata = pd.concat(all_metadata_dfs, ignore_index=True)
metadata_path = os.path.join(SAVE_DIR, "metadata.csv")
full_metadata.to_csv(metadata_path, index=False)
print(f"saved to {metadata_path}")
print(f"all data {len(full_metadata)}")



In [None]:
import numpy as np
from datasets import ClassLabel, load_dataset, Features, Value

SAVE_DIR = "/content/drive/MyDrive/Cv/SID_Set"
print("\n reading images from disk")

dataset_final = load_dataset("imagefolder", data_dir=SAVE_DIR)
dataset_final = dataset_final.cast_column("label", ClassLabel(num_classes=2))
full_data = dataset_final["train"]

print("starting splitting images to sets(test, train..)")

split_column = np.array(full_data["split"])

train_indices = np.where(split_column == "train")[0]
val_indices = np.where(split_column == "validation")[0]
test_indices = np.where(split_column == "test")[0]

train_dataset = full_data.select(train_indices)
validation_dataset = full_data.select(val_indices)
test_dataset = full_data.select(test_indices)

cols_to_keep = ["image", "label"]
remove_cols = [c for c in full_data.column_names if c not in cols_to_keep]

train_dataset = train_dataset.remove_columns(remove_cols)
validation_dataset = validation_dataset.remove_columns(remove_cols)
test_dataset = test_dataset.remove_columns(remove_cols)

print(f"Train Dataset: {len(train_dataset)}")

In [None]:
import matplotlib.pyplot as plt
import random

def show_random_batch(dataset, num_images=30, cols=6):
    indices = random.sample(range(len(dataset)), num_images)

    rows = (num_images + cols - 1) // cols

    plt.figure(figsize=(20, 3 * rows))

    for i, idx in enumerate(indices):
        item = dataset[idx]
        image = item['image']
        label = item['label']

        plt.subplot(rows, cols, i + 1)
        plt.imshow(image)
        plt.title(f"Label: {label}\nIdx: {idx}")
        plt.axis('off')

    plt.tight_layout()
    plt.show()

show_random_batch(train_dataset, num_images=30)

In [None]:
# convert dataset to proper format for LLM
instruction = """Real/Fake?"""
def convert_to_conversation(sample):
    label_text = "Real" if sample["label"] == 0 else "Fake"
    conversation = [
        { "role": "user",
          "content" : [
            {"type" : "text",  "text"  : instruction},
            {"type" : "image", "image" : sample["image"]} ]
        },
        { "role" : "assistant",
          "content" : [
            {"type" : "text",  "text"  : label_text } ]
        },
    ]
    return { "messages" : conversation }
converted_dataset = [convert_to_conversation(sample) for sample in train_dataset]
converted_dataset_valid = [convert_to_conversation(sample) for sample in validation_dataset]
converted_dataset_test = [convert_to_conversation(sample) for sample in test_dataset]

### Train

In [None]:
from transformers import EvalPrediction
from sklearn.metrics import accuracy_score
import numpy as np
import pdb as pd

# required token ids, end of message, "Fake" text token, "Real" text token
END_OF_MESSAGE_ID = 151645 # <|im_end|>
FAKE_ID = 52317
REAL_ID = 12768
CRITICAL_IDS = {FAKE_ID, REAL_ID}


def preprocess_logits_for_metrics(logits, labels):
    # this runs after each batch

    if isinstance(logits, tuple):
        logits = logits[0]

    return logits.argmax(dim=-1)  # (batch, seq_len, vocab) -> (batch, seq_len)

def compute_classification_metrics(p: EvalPrediction):
    """
    Predictions zaten argmax'lanmış geliyor (preprocess_logits_for_metrics sayesinde)
    """

    # use tensor
    preds = torch.from_numpy(p.predictions)
    labels_tensor = torch.from_numpy(p.label_ids)

    # apply mask to remove unnecessary token
    mask = labels_tensor != -100

    # find the token before last token with id "151645"(im_end)
    def find_decision_token(token_seq, seq_mask):
        # find decision token using mask

        # choose unmasked tokens
        masked_seq = token_seq[seq_mask]

        indices_of_end_token = (masked_seq == END_OF_MESSAGE_ID).nonzero(as_tuple=True)[0]

        if indices_of_end_token.size(0) > 0:
            last_end_token_index = indices_of_end_token[-1]
            karar_index = last_end_token_index - 1

            if karar_index >= 0:
                return masked_seq[karar_index]
        return None

    final_preds_list = []
    final_labels_list = []

    # run on each item
    for i in range(preds.size(0)):
        seq_mask = mask[i]

        label_token = find_decision_token(labels_tensor[i], seq_mask)
        pred_token = find_decision_token(preds[i], seq_mask)

        # compare tokens
        if label_token is not None and label_token.item() in CRITICAL_IDS:
            if pred_token is not None:
                final_labels_list.append(label_token.item())
                final_preds_list.append(pred_token.item())
            else:
                final_labels_list.append(label_token.item())
                final_preds_list.append(-999)

    if not final_labels_list:
        return {"accuracy": 0.0, "eval_RealFake_count": 0}

    accuracy = accuracy_score(final_labels_list, final_preds_list)

    return {
        "accuracy": float(accuracy),
        "eval_RealFake_count": int(len(final_labels_list))
    }

In [None]:
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
from transformers import EarlyStoppingCallback

FastVisionModel.for_training(model)
early_stopping = EarlyStoppingCallback(
    early_stopping_patience=3,
    early_stopping_threshold=0.0,
)

# !!!! This model is first trained without focal loss for one epoch like the model of Method 1 but then trained with focal loss for 2 epoch

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    data_collator = UnslothVisionDataCollator(model, tokenizer),
    train_dataset = converted_dataset,
    eval_dataset = converted_dataset_valid,
    callbacks = [early_stopping],
    compute_metrics = compute_classification_metrics,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics,
    loss_type='focal', # there was no this line in the previous training step
    loss_params={'alpha': 0.75, 'gamma': 2.0}, # this line also didn't exist in the previous training step
    args = SFTConfig(
        per_device_train_batch_size = 8,
        per_device_eval_batch_size = 10,
        gradient_accumulation_steps = 5,
        warmup_ratio = 0.1,
        num_train_epochs = 2, # it was 1 in the previous training step
        # learning_rate = 1e-4, # previous learning reate
        learning_rate = 5e-5,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        # lr_scheduler_type = "linear", # previous scheduler
        lr_scheduler_type = "cosine",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
        eval_steps = 10,
        metric_for_best_model = "accuracy",
        eval_strategy="steps",
        load_best_model_at_end = True,
        remove_unused_columns = False,
        dataset_text_field = "",
        dataset_kwargs = {"skip_prepare_dataset": True},
        max_length = 2048,
    ),
)

Unsloth: Model does not have a default image size - using 512


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,587 | Num Epochs = 2 | Total steps = 80
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 5
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 5 x 1) = 40
 "-____-"     Trainable parameters = 51,521,536 of 8,343,688,192 (0.62% trained)


Step,Training Loss,Validation Loss,Realfake Count,Accuracy
10,0.0173,0.007247,340,0.95
20,0.006,0.002997,340,0.985294
30,0.0048,0.002826,340,0.982353
40,0.0045,0.001567,340,0.988235
50,0.0037,0.000985,340,0.991176
60,0.0015,0.000752,340,0.997059
70,0.0005,0.000886,340,0.994118
80,0.003,0.000904,340,0.994118


In [None]:
model.save_pretrained("fake_image_detector_99_3")
tokenizer.save_pretrained("fake_image_detector_99_3")

[]

### Inference Tests

In [None]:
import torch
import torch.nn.functional as F
import pdb
FastVisionModel.for_inference(model)

correct = 0
total = 0

actual_tokenizer = tokenizer.tokenizer

# Get token IDs for "Fake" and "Real"
fake_token_id = actual_tokenizer.encode("Fake", add_special_tokens=False)[0]
real_token_id = actual_tokenizer.encode("Real", add_special_tokens=False)[0]

print(f"Fake token ID: {fake_token_id}, Real token ID: {real_token_id}")  # Debug

for num in range(len(converted_dataset_test)):
    image = converted_dataset_test[num]['messages'][0]['content'][1]['image']
    act_label = converted_dataset_test[num]['messages'][1]['content'][0]['text']

    instruction = """Real/Fake?"""
    messages = [
        {"role": "user", "content": [
            {"type": "image"},
            {"type": "text", "text": instruction}
        ]}
    ]

    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
    inputs = tokenizer(
        image,
        input_text,
        add_special_tokens=False,
        return_tensors="pt",
    ).to("cuda")

    # Generate to get prediction with scores
    outputs = model.generate(**inputs, max_new_tokens=128,
                            do_sample=False,
                            output_scores=True, return_dict_in_generate=True)

    generated_ids = outputs.sequences[0][inputs['input_ids'].shape[1]:]
    generated_text = actual_tokenizer.decode(generated_ids, skip_special_tokens=True)
    prediction = generated_text.strip().split()[0].lower() if generated_text.strip() else ""
    print(generated_text)
    # Get probabilities for the first token (Fake or Real)
    first_token_logits = outputs.scores[0][0]  # [vocab_size]
    probabilities = F.softmax(first_token_logits, dim=-1)

    fake_prob = probabilities[fake_token_id].item() * 100
    real_prob = probabilities[real_token_id].item() * 100

    actual = act_label.lower().strip()

    #this part has threshold to prevent fake bias
    pred_prob = fake_prob if prediction == "fake" else real_prob
    if prediction == "fake" and pred_prob < 80:
        prediction = "real"
        pred_prob = real_prob

    is_correct = prediction == actual
    if is_correct:
        correct += 1
    total += 1


    status = "✅ CORRECT" if is_correct else "❌ INCORRECT"
    print(f" {num+1}/{len(converted_dataset_valid)}: {status}")
    print(f"  {act_label} | {prediction} ({pred_prob:.2f}%)")
    print(f"CORRECT/TOTAL PREDICTION {correct}/{total} ({100*correct/total:.2f}%)")
    print("-" * 50)

print("\n" + "=" * 50)
print(f"FINAL ACCURACY: {correct}/{total} = {100*correct/total:.2f}%")
print("=" * 50)

### Other Datasets Testing (ImageNet and Dalle3)

In [None]:
import os
import torch
import torch.nn.functional as F
from PIL import Image
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

dataset_path = "/content/drive/MyDrive/Cv/dalle3_random_100"
valid_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}

FastVisionModel.for_inference(model)

actual_tokenizer = tokenizer.tokenizer
fake_token_id = actual_tokenizer.encode("Fake", add_special_tokens=False)[0]
real_token_id = actual_tokenizer.encode("Real", add_special_tokens=False)[0]

def get_label_from_filename(filename):
    return "fake"

image_files = []
if os.path.exists(dataset_path):
    for filename in os.listdir(dataset_path):
        ext = os.path.splitext(filename)[1].lower()
        if ext in valid_extensions:
            image_files.append(os.path.join(dataset_path, filename))
else:
    print(f"No folder found!")


if len(image_files) > 0:
    print(f"Total images: {len(image_files)}. Starting..\n")

    y_true = []
    y_pred = []

    for img_path in image_files:
        filename = os.path.basename(img_path)
        true_label = get_label_from_filename(filename)

        if true_label is None:
            continue

        try:
            image = Image.open(img_path).convert("RGB")

            instruction = "Real/Fake?"
            messages = [
                {"role": "user", "content": [
                    {"type": "image"},
                    {"type": "text", "text": instruction}
                ]}
            ]
            input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
            inputs = tokenizer(
                image,
                input_text,
                add_special_tokens=False,
                return_tensors="pt",
            ).to("cuda")

            with torch.no_grad():
                outputs = model.generate(**inputs, max_new_tokens=128,
                                        use_cache=True, do_sample=False,
                                        output_scores=True, return_dict_in_generate=True)

            generated_ids = outputs.sequences[0][inputs['input_ids'].shape[1]:]
            generated_text = actual_tokenizer.decode(generated_ids, skip_special_tokens=True)
            raw_pred = generated_text.strip().split()[0].lower() if generated_text.strip() else "unknown"

            first_token_logits = outputs.scores[0][0]
            probabilities = F.softmax(first_token_logits, dim=-1)

            fake_prob = probabilities[fake_token_id].item() * 100
            real_prob = probabilities[real_token_id].item() * 100

            if "fake" in raw_pred:
                prediction = "fake"
                current_prob = fake_prob
            elif "real" in raw_pred:
                prediction = "real"
                current_prob = real_prob
            else:
                prediction = "unknown"
                current_prob = 0.0

            # apply threshold to prevent fake bias
            threshold_msg = ""
            if prediction == "fake" and fake_prob < 80.0:
                prediction = "real"
                current_prob = real_prob
                threshold_msg = " [fixed with threshold]"

            y_true.append(true_label)
            y_pred.append(prediction)

            match_icon = "✅" if true_label == prediction else "❌"

            print(f"{match_icon} {filename} | True: {true_label} -> Pred: {prediction.upper()} ({current_prob:.2f}%){threshold_msg}")

        except Exception as e:
            print(f"Exception!!! {e}")

    print("\n" + "="*40)
    print("RESULTS")
    print("="*40)

    if len(y_true) > 0:
        acc = accuracy_score(y_true, y_pred)
        print(f"\nAccuracy: {acc:.4f} ({acc*100:.2f}%)")

        labels = ["fake", "real"]

        try:
            cm = confusion_matrix(y_true, y_pred, labels=labels)
            cm_df = pd.DataFrame(cm, index=[f"Act {l}" for l in labels],
                                 columns=[f"Pred {l}" for l in labels])
            print("\nConfusion Matrix:")
            print(cm_df)

            print("\nClassification Report:")
            print(classification_report(y_true, y_pred, target_names=labels))
        except Exception as e:
            print(f"Exception ", e)

    else:
        print("No data!")

else:
    print("No data in folder! Check path")

In [None]:
# this code is same as the above, the difference is this code have compression of images like on social media

import os
import torch
import torch.nn.functional as F
from PIL import Image
import io
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

dataset_path = "/content/drive/MyDrive/Cv/dalle3_random_100"
valid_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".webp"}

FastVisionModel.for_inference(model)

actual_tokenizer = tokenizer.tokenizer
fake_token_id = actual_tokenizer.encode("Fake", add_special_tokens=False)[0]
real_token_id = actual_tokenizer.encode("Real", add_special_tokens=False)[0]

def get_label_from_filename(filename):
    return "fake"

image_files = []
if os.path.exists(dataset_path):
    for filename in os.listdir(dataset_path):
        ext = os.path.splitext(filename)[1].lower()
        if ext in valid_extensions:
            image_files.append(os.path.join(dataset_path, filename))
else:
    print(f"No folder found!")

# --- ANA MANTIK ---
if len(image_files) > 0:
    print(f"Total images: {len(image_files)}. Starting..\n")

    y_true = []
    y_pred = []

    for img_path in image_files:
        filename = os.path.basename(img_path)
        true_label = get_label_from_filename(filename)

        if true_label is None:
            continue

        try:
            image = Image.open(img_path).convert("RGB")

            # this step to simulate compression on social media
            buffer = io.BytesIO()
            image.save(buffer, format="JPEG", quality=65, optimize=True)
            buffer.seek(0)
            image = Image.open(buffer)

            instruction = "Real/Fake?"
            messages = [
                {"role": "user", "content": [
                    {"type": "image"},
                    {"type": "text", "text": instruction}
                ]}
            ]
            input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
            inputs = tokenizer(
                image,
                input_text,
                add_special_tokens=False,
                return_tensors="pt",
            ).to("cuda")

            with torch.no_grad():
                outputs = model.generate(**inputs, max_new_tokens=128,
                                        use_cache=True, do_sample=False,
                                        output_scores=True, return_dict_in_generate=True)

            generated_ids = outputs.sequences[0][inputs['input_ids'].shape[1]:]
            generated_text = actual_tokenizer.decode(generated_ids, skip_special_tokens=True)
            raw_pred = generated_text.strip().split()[0].lower() if generated_text.strip() else "unknown"

            first_token_logits = outputs.scores[0][0]
            probabilities = F.softmax(first_token_logits, dim=-1)

            fake_prob = probabilities[fake_token_id].item() * 100
            real_prob = probabilities[real_token_id].item() * 100

            if "fake" in raw_pred:
                prediction = "fake"
                current_prob = fake_prob
            elif "real" in raw_pred:
                prediction = "real"
                current_prob = real_prob
            else:
                prediction = "unknown"
                current_prob = 0.0

            # threshold part to prevent fake bias
            threshold_msg = ""
            if prediction == "fake" and fake_prob < 80.0:
                prediction = "real"
                current_prob = real_prob
                threshold_msg = " [fixed with threshold]"

            y_true.append(true_label)
            y_pred.append(prediction)

            match_icon = "✅" if true_label == prediction else "❌"

            print(f"{match_icon} {filename} | (JPEG-65) | True: {true_label} -> Pred: {prediction.upper()} ({current_prob:.2f}%){threshold_msg}")

        except Exception as e:
            print(f"Exception!!! {e}")

    print("\n" + "="*40)
    print("RESULTS (With Compression)")
    print("="*40)

    if len(y_true) > 0:
        acc = accuracy_score(y_true, y_pred)
        print(f"\nAccuracy: {acc:.4f} ({acc*100:.2f}%)")

        labels = ["fake", "real"]
        try:
            cm = confusion_matrix(y_true, y_pred, labels=labels)
            cm_df = pd.DataFrame(cm, index=[f"Act {l}" for l in labels],
                                 columns=[f"Pred {l}" for l in labels])
            print("\nConfusion Matrix:")
            print(cm_df)

            print("\nClassification Report:")
            print(classification_report(y_true, y_pred, target_names=labels))
        except Exception as e:
            print(f"Exception ", e)

    else:
        print("No data!")

else:
    print("No data in folder! Check path")

### Save model

In [None]:
model.save_pretrained("fake_image_detector_99_3")
tokenizer.save_pretrained("fake_image_detector_99_3")