In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Install ViT

In [1]:
!pip install transformers torch torchvision



# Import

In [8]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
from transformers import ViTForImageClassification

### Device Check

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Load Model

In [12]:
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    num_labels = 2,
    ignore_mismatched_sizes=True
)
model.to(device)
print("viT Model Loaded Successfully")

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


viT Model Loaded Successfully


### Unlock Some Layer of Model

In [13]:
for param in model.parameters():
    param.requires_grad = False
for param in model.vit.encoder.layer[-4:].parameters():
    param.requires_grad = True

# Load Information and Create DataLodaer

In [14]:
# Path of Dataset
BASE_PATH = "/kaggle/input/ai-vs-human-generated-dataset"
TRAIN_CSV = os.path.join(BASE_PATH, "train.csv")
TEST_CSV = os.path.join(BASE_PATH, "test.csv")

In [15]:
# Load CSV
df_train = pd.read_csv(os.path.join(BASE_PATH, "train.csv"))
df_test = pd.read_csv(os.path.join(BASE_PATH, "test.csv"))

print("train.csv:")
print(df_train.head())

print("\n test.csv:")
print(df_test.head())

train.csv:
   Unnamed: 0                                        file_name  label
0           0  train_data/a6dcb93f596a43249135678dfcfc17ea.jpg      1
1           1  train_data/041be3153810433ab146bc97d5af505c.jpg      0
2           2  train_data/615df26ce9494e5db2f70e57ce7a3a4f.jpg      1
3           3  train_data/8542fe161d9147be8e835e50c0de39cd.jpg      0
4           4  train_data/5d81fa12bc3b4cea8c94a6700a477cf2.jpg      1

 test.csv:
                                                  id
0  test_data_v2/1a2d9fd3e21b4266aea1f66b30aed157.jpg
1  test_data_v2/ab5df8f441fe4fbf9dc9c6baae699dc7.jpg
2  test_data_v2/eb364dd2dfe34feda0e52466b7ce7956.jpg
3  test_data_v2/f76c2580e9644d85a741a42c6f6b39c0.jpg
4  test_data_v2/a16495c578b7494683805484ca27cf9f.jpg


In [16]:
df_train["file_path"] = df_train["file_name"].apply(lambda x: os.path.join(BASE_PATH, x))
df_test["file_path"] = df_test["id"].apply(lambda x: os.path.join(BASE_PATH, x))

In [17]:
print(df_train["file_name"].head(10))

0    train_data/a6dcb93f596a43249135678dfcfc17ea.jpg
1    train_data/041be3153810433ab146bc97d5af505c.jpg
2    train_data/615df26ce9494e5db2f70e57ce7a3a4f.jpg
3    train_data/8542fe161d9147be8e835e50c0de39cd.jpg
4    train_data/5d81fa12bc3b4cea8c94a6700a477cf2.jpg
5    train_data/25ea852f30594bc5915eb929682af429.jpg
6    train_data/e67085fb6d814cbabe08f978c738f3f7.jpg
7    train_data/041c36d9269146cdb88e7526e3b91651.jpg
8    train_data/4aea3b876247467c8d3713d4920148ab.jpg
9    train_data/09708379751e44d0bc908d8652d0db3e.jpg
Name: file_name, dtype: object


In [18]:
existing_files = df_train["file_path"].apply(os.path.exists).sum()
print(f"Find {existing_files} From {len(df_train)} in train_data")

Find 79950 From 79950 in train_data


### Data Augmentation

In [19]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

### Create Dataset

In [28]:
class ImageDataset(Dataset):
    def __init__(self, df, transform, test_mode=False):
        self.df = df
        self.transform = transform
        self.test_mode = test_mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]["file_path"]
        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)

        if self.test_mode:
            return img
        else:
            label = int(self.df.iloc[idx]["label"])
            return img, label

### Split Train/Validation (80/20)

In [29]:
train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=42, stratify=df_train["label"])

### Create Dataloader

In [30]:
BATCH_SIZE = 16
train_loader = DataLoader(ImageDataset(train_df, transform), batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(ImageDataset(val_df, transform), batch_size=BATCH_SIZE, shuffle=False)

print(f"Train Data: {len(train_loader.dataset)} images")
print(f"Validation Data: {len(val_loader.dataset)} images")

Train Data: 63960 images
Validation Data: 15990 images


In [14]:
# Use AdamW Optimizer and CrossEntropy Loss
optimizer = optim.AdamW(model.parameters(), lr=3e-5, weight_decay=1e-4)

# Use Label Smoothing reduce Overfitting
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing

    def forward(self, preds, target):
        log_preds = torch.nn.functional.log_softmax(preds, dim=-1)
        n_classes = preds.size(-1)
        one_hot = torch.zeros_like(preds).scatter(1, target.unsqueeze(1), 1)
        smoothed_labels = (1 - self.smoothing) * one_hot + self.smoothing / n_classes
        return torch.mean(torch.sum(-smoothed_labels * log_preds, dim=-1))

loss_fn = nn.CrossEntropyLoss()

# Learning Rate Scheduler
lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, min_lr=1e-6, verbose=True)

# Early Stopping
early_stopping_patience = 3
best_val_acc = 0
early_stop_counter = 0

# Train Function
def train_epoch(model, train_loader):
    model.train()
    total_loss, total_correct = 0, 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs).logits
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_correct += (outputs.argmax(1) == labels).sum().item()
    return total_loss / len(train_loader), total_correct / len(train_loader.dataset)

# Validation Function
def evaluate(model, val_loader):
    model.eval()
    total_correct = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs).logits
            total_correct += (outputs.argmax(1) == labels).sum().item()
    return total_correct / len(val_loader.dataset)

# Model Train
EPOCHS = 50
for epoch in range(EPOCHS):
    train_loss, train_acc = train_epoch(model, train_loader)
    val_acc = evaluate(model, val_loader)

    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

    lr_scheduler.step(val_acc)

if val_acc > best_val_acc:
    best_val_acc = val_acc
    early_stop_counter = 0
    torch.save(model.state_dict(), "best_model.pth") 
else:
    early_stop_counter += 1
    if early_stop_counter >= early_stopping_patience:
        print(f" Early Stopping Triggered at Epoch {epoch+1}")
        break


Epoch 1/50 | Train Loss: 0.0634 | Train Acc: 0.9768 | Val Acc: 0.9906
Epoch 2/50 | Train Loss: 0.0307 | Train Acc: 0.9893 | Val Acc: 0.9894
Epoch 3/50 | Train Loss: 0.0228 | Train Acc: 0.9921 | Val Acc: 0.9891
Epoch 4/50 | Train Loss: 0.0201 | Train Acc: 0.9932 | Val Acc: 0.9923
Epoch 5/50 | Train Loss: 0.0175 | Train Acc: 0.9942 | Val Acc: 0.9871
Epoch 6/50 | Train Loss: 0.0165 | Train Acc: 0.9945 | Val Acc: 0.9931
Epoch 7/50 | Train Loss: 0.0146 | Train Acc: 0.9949 | Val Acc: 0.9904
Epoch 8/50 | Train Loss: 0.0137 | Train Acc: 0.9954 | Val Acc: 0.9898
Epoch 9/50 | Train Loss: 0.0142 | Train Acc: 0.9952 | Val Acc: 0.9941
Epoch 10/50 | Train Loss: 0.0133 | Train Acc: 0.9955 | Val Acc: 0.9921
Epoch 11/50 | Train Loss: 0.0123 | Train Acc: 0.9960 | Val Acc: 0.9789
Epoch 12/50 | Train Loss: 0.0134 | Train Acc: 0.9954 | Val Acc: 0.9904
Early Stopping Triggered at Epoch 12


In [35]:
MODEL_PATH = "best_model.pth"
torch.save(model.state_dict(), MODEL_PATH)
print(f"Model saved successfully at {MODEL_PATH}")

Model saved successfully at best_model.pth


### Load Best Model

In [36]:
MODEL_PATH = "best_model.pth"

if os.path.exists(MODEL_PATH):
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.to(device)
    print("Best Model Loaded Successfully!")
else:
    print(f"Model file NOT found: {MODEL_PATH}. You may need to retrain and save the model.")


  model.load_state_dict(torch.load(MODEL_PATH, map_location=device))


Best Model Loaded Successfully!


In [None]:
from sklearn.metrics import accuracy_score, f1_score

# Evaluate
def evaluate(model, val_loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs).logits
            preds = outputs.argmax(1).cpu().numpy()
            labels = labels.cpu().numpy()
            
            all_preds.extend(preds)
            all_labels.extend(labels)
    
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average="weighted")
    
    print(f"Validation Accuracy: {acc:.4f}")
    print(f"Validation F1 Score: {f1:.4f}")
    
    return acc, f1

In [37]:
val_acc, val_f1 = evaluate(model, val_loader)

✅ Validation Accuracy: 0.9901
✅ Validation F1 Score: 0.9901


In [38]:
LOG_FILE = "training_log.txt"
log_text = f"Final Model Evaluation | Val Acc: {val_acc:.4f} | Val F1: {val_f1:.4f}\n"

with open(LOG_FILE, "w") as f:
    f.write(log_text)

print(f"Training log saved at {LOG_FILE}")

✅ Training log saved at training_log.txt


In [40]:
# Edit Dataset test set no label
class ImageDataset(Dataset):
    def __init__(self, df, transform, test_mode=False):
        self.df = df
        self.transform = transform
        self.test_mode = test_mode 

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]["file_path"]
        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)

        if self.test_mode:
            return img
        else:
            label = int(self.df.iloc[idx]["label"])
            return img, label


In [27]:
def predict_test(model, test_df, transform, batch_size=8):
    model.eval()
    test_dataset = ImageDataset(test_df, transform, test_mode=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    predictions = []
    ids = test_df["id"].tolist()

    with torch.no_grad():
        for imgs in test_loader:
            imgs = imgs.to(device)
            outputs = model(imgs).logits
            preds = outputs.argmax(1).cpu().numpy()
            predictions.extend(preds)

    return ids, predictions

test_ids, test_preds = predict_test(model, df_test, transform)

In [42]:
submission_df = pd.DataFrame({"id": test_ids, "label": test_preds})
submission_df.to_csv("submission2.csv", index=False)

print(f"Submission file 'submission2.csv' created successfully!")

✅ Submission file 'submission2.csv' created successfully!


### Test

In [46]:
from PIL import Image
import torch
from scipy import stats
import numpy as np

In [49]:
def perform_ttest(ai_scores, human_scores):

    # Check if there is enough data for T-Test
    if len(ai_scores) < 2 or len(human_scores) < 2:
        print("Information not enough for T-Test (need at least 2 samples per group).")
        return

    # Calculate T-Test
    t_stat, p_value = stats.ttest_ind(ai_scores, human_scores, equal_var=False)

    # Show results
    print("\nT-Test Result:")
    print(f"T-Statistic: {t_stat:.4f}")
    print(f"P-Value: {p_value:.4f}")

    # Interpretation
    if p_value < 0.05:
        print("There is a statistically significant difference between AI-generated images and human-created images.")
    else:
        print("No statistically significant difference was found between AI-generated images and human-created images.")


In [51]:
def predict_image(model, image_path, transform):

    model.eval()  # Set the model to evaluation mode
    
    # Load and transform the image
    img = Image.open(image_path).convert("RGB")
    img = transform(img)
    img = img.unsqueeze(0).to(device)  # Add batch dimension and move to device

    with torch.no_grad():
        output = model(img).logits
        pred = output.argmax(1).item()  # 0 = Human-Created, 1 = AI-Generated

    # Interpret the result
    result = "AI-Generated" if pred == 1 else "Human-Created"
    return result


In [54]:
ai_scores = []
human_scores = []

UPLOAD_FOLDER = "/kaggle/input/datadata"

if os.path.isdir(UPLOAD_FOLDER):
    image_files = [f for f in os.listdir(UPLOAD_FOLDER) if f.endswith((".png", ".jpg", ".jpeg"))]

    if not image_files:
        print("No upload image")
    else:
        print(f"Found {len(image_files)} uploaded images")

        for img_file in image_files:
            img_path = os.path.join(UPLOAD_FOLDER, img_file)
            result = predict_image(model, img_path, transform)

            print(f"Image: {img_file} → Prediction: {result}")

            if result == "AI-Generated":
                ai_scores.append(1)
            else:
                human_scores.append(0)

        perform_ttest(ai_scores, human_scores)


Found 2 uploaded images
Image: download (2).jpg → Prediction: Human-Created
Image: download.jpg → Prediction: Human-Created
Information not enough for T-Test (need at least 2 samples per group).
