In [9]:
!pip install kaggle timm


Collecting timm
  Downloading timm-1.0.15-py3-none-any.whl.metadata (52 kB)
Collecting pyyaml (from timm)
  Using cached PyYAML-6.0.2-cp310-cp310-win_amd64.whl.metadata (2.1 kB)
Collecting huggingface_hub (from timm)
  Downloading huggingface_hub-0.30.1-py3-none-any.whl.metadata (13 kB)
Collecting safetensors (from timm)
  Downloading safetensors-0.5.3-cp38-abi3-win_amd64.whl.metadata (3.9 kB)
Collecting filelock (from huggingface_hub->timm)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec>=2023.5.0 (from huggingface_hub->timm)
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting networkx (from torch->timm)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch->timm)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting sympy==1.13.1 (from torch->timm)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch->

In [1]:
import numpy as np
import pandas as pd

In [2]:
import os
import zipfile
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from timm.models.resnet import resnet50
import torch.nn as nn
import torch.optim as optim

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os
import zipfile

# Step 1: Set Kaggle API token location
os.environ['KAGGLE_CONFIG_DIR'] = os.getcwd()  # Points to current directory

# Step 2: Create output directories
os.makedirs("train_data", exist_ok=True)
os.makedirs("test_data", exist_ok=True)
print("directories has made")

# Step 3: Download datasets using Kaggle CLI
# os.system("kaggle datasets download -d shreyansjain04/ai-vs-real-image-dataset -p train_data")
os.system("kaggle datasets download -d shreyansjain04/ai-vs-real-image-test-dataset -p test_data")
print("zip download process started")

# # Step 4: Unzip both datasets
# with zipfile.ZipFile("train_data/ai-vs-real-image-dataset.zip", 'r') as zip_ref:
#     zip_ref.extractall("train_data")




directories has made
zip download process started


In [4]:
with zipfile.ZipFile("test_data/ai-vs-real-image-test-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("test_data")

print("✅ test datasets downloaded and extracted.")

✅ test datasets downloaded and extracted.


In [25]:
import os
from collections import defaultdict

def count_file_types(folder):
    file_types = defaultdict(int)
    for file in os.listdir(folder):
        file_path = os.path.join(folder, file)
        if os.path.isfile(file_path):
            ext = os.path.splitext(file)[-1].lower().lstrip('.')
            file_types[ext] += 1
    return file_types

def list_folders(directory, indent=0):
    for item in os.listdir(directory):
        if item =='env':
            continue
        path = os.path.join(directory, item)
        if os.path.isdir(path):
            print("  " * indent + f"📂 {item}")
            file_counts = count_file_types(path)
            if file_counts:
                file_counts_str = ", ".join(f"{count} {ext}" for ext, count in file_counts.items())
                print("  " * (indent + 1) + f"📄 {file_counts_str}")
            list_folders(path, indent + 1)

if __name__ == "__main__":
    current_directory = os.getcwd()
    print(f"Root: {current_directory}")
    list_folders(current_directory)

Root: d:\OneDrive - iitgn.ac.in\Desktop\HackRush
📂 model
  📄 1 bin
📂 test_data
  📄 1 zip
  📂 teamspace
    📂 studios
      📂 this_studio
        📂 final_test_renamed
          📄 5000 jpg
📂 train
  📂 ai
    📂 AiArtData
      📄 385 jpg, 115 png, 36 jpeg, 3 webp
  📂 real
    📂 RealArt
      📄 378 jpg, 35 png, 1 , 21 jpeg, 1 gif


## Data Processing

In [26]:
!pip install opencv-python -q

In [27]:
!pip install scikit-learn -q

In [36]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score

# ------------------------------
# 1. CONFIGURATION
# ------------------------------
IMAGE_SIZE = 224
BATCH_SIZE = 32
NUM_EPOCHS = 5
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ------------------------------
# 2. UTILS: FFT Feature Extraction
# ------------------------------
def extract_fft_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    f = np.fft.fft2(gray)
    fshift = np.fft.fftshift(f)
    magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1e-8)
    resized = cv2.resize(magnitude_spectrum, (32, 32)) / 255.0
    return resized.astype(np.float32)

# ------------------------------
# 3. CUSTOM DATASET
# ------------------------------
class DeepFakeDataset(Dataset):
    def __init__(self, img_paths, labels=None, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        path = self.img_paths[idx]
        image = Image.open(path).convert("RGB")
        np_img = np.array(image)
        fft_feat = extract_fft_features(np_img)
        fft_feat = torch.tensor(fft_feat).unsqueeze(0)  # (1, 32, 32)

        if self.transform:
            image = self.transform(image)

        if self.labels is not None:
            label = self.labels[idx]
            return image, fft_feat, label
        else:
            filename = os.path.basename(path)
            return image, fft_feat, filename

# ------------------------------
# 4. TRANSFORMS
# ------------------------------
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# ------------------------------
# 5. DATA LOADING
# ------------------------------
def get_valid_images(folder):
    valid_exts = [".jpg", ".jpeg", ".png"]
    return [os.path.join(folder, f) for f in os.listdir(folder)
            if os.path.splitext(f.lower())[1] in valid_exts]

ai_images = get_valid_images("train/ai/AiArtData")
real_images = get_valid_images("train/real/RealArt")

all_images = ai_images + real_images
all_labels = [1]*len(ai_images) + [0]*len(real_images)

dataset = DeepFakeDataset(all_images, all_labels, transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_ds, val_ds = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)

# ------------------------------
# 6. MODEL: ResNet50 + FFT Fusion
# ------------------------------
class ResNetWithFreq(nn.Module):
    def __init__(self):
        super(ResNetWithFreq, self).__init__()
        self.cnn = models.resnet50(pretrained=True)
        self.cnn.fc = nn.Identity()  # Output: 2048 features

        self.freq_branch = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 16x16
            nn.Conv2d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))  # Global pooling
        )

        self.classifier = nn.Sequential(
            nn.Linear(2048 + 16, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 2)
        )

    def forward(self, x_img, x_fft):
        img_feat = self.cnn(x_img)
        fft_feat = self.freq_branch(x_fft)
        fft_feat = fft_feat.view(fft_feat.size(0), -1)
        x = torch.cat((img_feat, fft_feat), dim=1)
        return self.classifier(x)

# ------------------------------
# 7. TRAINING LOOP
# ------------------------------
model = ResNetWithFreq().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0
    for imgs, freqs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
        imgs, freqs, labels = imgs.to(DEVICE), freqs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(imgs, freqs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for imgs, freqs, labels in val_loader:
            imgs, freqs = imgs.to(DEVICE), freqs.to(DEVICE)
            outputs = model(imgs, freqs)
            p = torch.argmax(outputs, dim=1).cpu().numpy()
            preds.extend(p)
            targets.extend(labels.numpy())
    acc = accuracy_score(targets, preds)
    print(f"Epoch [{epoch+1}] - Loss: {running_loss:.4f} - Val Accuracy: {acc:.4f}")

torch.save(model.state_dict(), "best_model.pth")

# ------------------------------
# 8. TEST SET INFERENCE + SUBMISSION
# ------------------------------
test_dir = "test_data/teamspace/studios/this_studio/final_test_renamed"
test_imgs = get_valid_images(test_dir)
test_ds = DeepFakeDataset(test_imgs, transform=transform)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

model.eval()
submission = []

with torch.no_grad():
    for imgs, freqs, filenames in tqdm(test_loader, desc="Predicting"):
        imgs, freqs = imgs.to(DEVICE), freqs.to(DEVICE)
        outputs = model(imgs, freqs)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        submission.extend(list(zip(filenames, preds)))

submission_df = pd.DataFrame(submission, columns=["filename", "class"])
submission_df.to_csv("submission.csv", index=False)
print("✅ Saved: submission.csv")
print(submission_df.head())


Epoch 1/5: 100%|██████████| 25/25 [03:40<00:00,  8.84s/it]


Epoch [1] - Loss: 13.0393 - Val Accuracy: 0.7835


Epoch 2/5: 100%|██████████| 25/25 [03:10<00:00,  7.63s/it]


Epoch [2] - Loss: 4.1984 - Val Accuracy: 0.8351


Epoch 3/5: 100%|██████████| 25/25 [03:15<00:00,  7.82s/it]


Epoch [3] - Loss: 1.8119 - Val Accuracy: 0.8351


Epoch 4/5: 100%|██████████| 25/25 [03:12<00:00,  7.70s/it]


Epoch [4] - Loss: 1.4338 - Val Accuracy: 0.8351


Epoch 5/5: 100%|██████████| 25/25 [03:22<00:00,  8.10s/it]


Epoch [5] - Loss: 1.0163 - Val Accuracy: 0.8093


Predicting: 100%|██████████| 157/157 [25:54<00:00,  9.90s/it]  


✅ Saved: submission.csv
   filename  class
0     1.jpg      1
1    10.jpg      0
2   100.jpg      1
3  1000.jpg      0
4  1001.jpg      1


## Apply Resnet 50

In [37]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score

# ------------------------------
# 1. CONFIGURATION
# ------------------------------
IMAGE_SIZE = 224
BATCH_SIZE = 32
NUM_EPOCHS = 5
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [38]:
# ------------------------------
# 2. CUSTOM DATASET
# ------------------------------
class DeepFakeDataset(Dataset):
    def __init__(self, img_paths, labels=None, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        path = self.img_paths[idx]
        image = Image.open(path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        if self.labels is not None:
            label = self.labels[idx]
            return image, label
        else:
            filename = os.path.basename(path)
            return image, filename

# ------------------------------
# 3. TRANSFORMS
# ------------------------------
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# ------------------------------
# 4. DATA LOADING
# ------------------------------
def get_valid_images(folder):
    valid_exts = [".jpg", ".jpeg", ".png"]
    return [os.path.join(folder, f) for f in os.listdir(folder)
            if os.path.splitext(f.lower())[1] in valid_exts]

ai_images = get_valid_images("train/ai/AiArtData")
real_images = get_valid_images("train/real/RealArt")

all_images = ai_images + real_images
all_labels = [1]*len(ai_images) + [0]*len(real_images)

dataset = DeepFakeDataset(all_images, all_labels, transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_ds, val_ds = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)

# ------------------------------
# 5. MODEL: ResNet50
# ------------------------------
class SimpleResNet(nn.Module):
    def __init__(self):
        super(SimpleResNet, self).__init__()
        self.backbone = models.resnet50(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, 2)

    def forward(self, x):
        return self.backbone(x)

In [39]:
# ------------------------------
# 6. TRAINING LOOP
# ------------------------------
model = SimpleResNet().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(DEVICE)
            outputs = model(imgs)
            p = torch.argmax(outputs, dim=1).cpu().numpy()
            preds.extend(p)
            targets.extend(labels.numpy())
    acc = accuracy_score(targets, preds)
    print(f"Epoch [{epoch+1}] - Loss: {running_loss:.4f} - Val Accuracy: {acc:.4f}")

torch.save(model.state_dict(), "resnet50_model.pth")

Epoch 1/5: 100%|██████████| 25/25 [00:38<00:00,  1.52s/it]


Epoch [1] - Loss: 11.7944 - Val Accuracy: 0.8093


Epoch 2/5: 100%|██████████| 25/25 [00:36<00:00,  1.46s/it]


Epoch [2] - Loss: 2.9919 - Val Accuracy: 0.8093


Epoch 3/5: 100%|██████████| 25/25 [00:38<00:00,  1.54s/it]


Epoch [3] - Loss: 0.6660 - Val Accuracy: 0.8144


Epoch 4/5: 100%|██████████| 25/25 [00:37<00:00,  1.49s/it]


Epoch [4] - Loss: 0.6606 - Val Accuracy: 0.8454


Epoch 5/5: 100%|██████████| 25/25 [00:33<00:00,  1.35s/it]


Epoch [5] - Loss: 0.9335 - Val Accuracy: 0.8144


In [40]:
# ------------------------------
# 7. TEST SET INFERENCE + SUBMISSION
# ------------------------------
test_dir = "test_data/teamspace/studios/this_studio/final_test_renamed"
test_imgs = get_valid_images(test_dir)
test_ds = DeepFakeDataset(test_imgs, transform=transform)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

model.eval()
submission = []

with torch.no_grad():
    for imgs, filenames in tqdm(test_loader, desc="Predicting"):
        imgs = imgs.to(DEVICE)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        submission.extend(list(zip(filenames, preds)))

submission_df = pd.DataFrame(submission, columns=["filename", "class"])
submission_df.to_csv("submission.csv", index=False)
print("✅ Saved: submission.csv")
print(submission_df.head())


Predicting: 100%|██████████| 157/157 [01:46<00:00,  1.47it/s]


✅ Saved: submission.csv
   filename  class
0     1.jpg      1
1    10.jpg      0
2   100.jpg      1
3  1000.jpg      0
4  1001.jpg      1


## Hyperparameter Tunning

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score
import itertools

# ------------------------------
# 1. CONFIGURATION
# ------------------------------
IMAGE_SIZE = 224
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Grid search hyperparameters
LR_LIST = [1e-3, 1e-4]
BATCH_SIZE_LIST = [16, 32]
EPOCHS_LIST = [3, 5]

# ------------------------------
# 2. CUSTOM DATASET
# ------------------------------
class DeepFakeDataset(Dataset):
    def __init__(self, img_paths, labels=None, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        path = self.img_paths[idx]
        image = Image.open(path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        if self.labels is not None:
            label = self.labels[idx]
            return image, label
        else:
            filename = os.path.basename(path)
            return image, filename

# ------------------------------
# 3. IMAGE TRANSFORMS
# ------------------------------
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# ------------------------------
# 4. DATA LOADING
# ------------------------------
def get_valid_images(folder):
    valid_exts = [".jpg", ".jpeg", ".png"]
    return [os.path.join(folder, f) for f in os.listdir(folder)
            if os.path.splitext(f.lower())[1] in valid_exts]

ai_images = get_valid_images("train/ai/AiArtData")
real_images = get_valid_images("train/real/RealArt")

all_images = ai_images + real_images
all_labels = [1]*len(ai_images) + [0]*len(real_images)

dataset = DeepFakeDataset(all_images, all_labels, transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

# ------------------------------
# 5. MODEL CLASS
# ------------------------------
class SimpleResNet(nn.Module):
    def __init__(self):
        super(SimpleResNet, self).__init__()
        self.backbone = models.resnet50(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, 2)

    def forward(self, x):
        return self.backbone(x)

# ------------------------------
# 6. HYPERPARAMETER TUNING
# ------------------------------
results = []

for LR, BATCH_SIZE, EPOCHS in itertools.product(LR_LIST, BATCH_SIZE_LIST, EPOCHS_LIST):
    print(f"\n🔧 Training with LR={LR}, Batch={BATCH_SIZE}, Epochs={EPOCHS}")

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)

    model = SimpleResNet().to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0
        for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

    # Evaluation
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(DEVICE)
            outputs = model(imgs)
            p = torch.argmax(outputs, dim=1).cpu().numpy()
            preds.extend(p)
            targets.extend(labels.numpy())

    acc = accuracy_score(targets, preds)
    print(f"✅ Accuracy: {acc:.4f}")
    results.append((LR, BATCH_SIZE, EPOCHS, acc))

# ------------------------------
# 7. BEST MODEL + FINAL TRAINING
# ------------------------------
best_combo = max(results, key=lambda x: x[3])
best_lr, best_batch, best_epoch, best_acc = best_combo
print(f"\n🥇 Best Hyperparams => LR: {best_lr}, Batch: {best_batch}, Epochs: {best_epoch}, Val Acc: {best_acc:.4f}")

# Retrain best model
train_loader = DataLoader(train_ds, batch_size=best_batch, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=best_batch)

model = SimpleResNet().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

for epoch in range(best_epoch):
    model.train()
    for imgs, labels in tqdm(train_loader, desc=f"[BEST] Epoch {epoch+1}/{best_epoch}"):
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

torch.save(model.state_dict(), "best_resnet50_model.pth")

# ------------------------------
# 8. FINAL TEST INFERENCE
# ------------------------------
test_dir = "test_data/teamspace/studios/this_studio/final_test_renamed"
test_imgs = get_valid_images(test_dir)
test_ds = DeepFakeDataset(test_imgs, transform=transform)
test_loader = DataLoader(test_ds, batch_size=best_batch)

model.eval()
submission = []

with torch.no_grad():
    for imgs, filenames in tqdm(test_loader, desc="📦 Predicting"):
        imgs = imgs.to(DEVICE)
        outputs = model(imgs)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        submission.extend(list(zip(filenames, preds)))

submission_df = pd.DataFrame(submission, columns=["filename", "class"])
submission_df.to_csv("submission_hyper_tunned.csv", index=False)
print("✅ Saved: submission.csv")
print(submission_df.head())



🔧 Training with LR=0.001, Batch=16, Epochs=3


Epoch 1/3: 100%|██████████| 49/49 [00:41<00:00,  1.17it/s]
Epoch 2/3: 100%|██████████| 49/49 [00:38<00:00,  1.27it/s]
Epoch 3/3: 100%|██████████| 49/49 [00:58<00:00,  1.18s/it]


✅ Accuracy: 0.6649

🔧 Training with LR=0.001, Batch=16, Epochs=5


Epoch 1/5: 100%|██████████| 49/49 [00:46<00:00,  1.06it/s]
Epoch 2/5: 100%|██████████| 49/49 [00:35<00:00,  1.37it/s]
Epoch 3/5: 100%|██████████| 49/49 [00:34<00:00,  1.42it/s]
Epoch 4/5: 100%|██████████| 49/49 [00:38<00:00,  1.29it/s]
Epoch 5/5: 100%|██████████| 49/49 [00:36<00:00,  1.34it/s]


✅ Accuracy: 0.6856

🔧 Training with LR=0.001, Batch=32, Epochs=3


Epoch 1/3: 100%|██████████| 25/25 [00:32<00:00,  1.30s/it]
Epoch 2/3: 100%|██████████| 25/25 [00:32<00:00,  1.28s/it]
Epoch 3/3: 100%|██████████| 25/25 [00:35<00:00,  1.40s/it]


✅ Accuracy: 0.7268

🔧 Training with LR=0.001, Batch=32, Epochs=5


Epoch 1/5: 100%|██████████| 25/25 [00:33<00:00,  1.33s/it]
Epoch 2/5: 100%|██████████| 25/25 [00:38<00:00,  1.52s/it]
Epoch 3/5: 100%|██████████| 25/25 [00:56<00:00,  2.25s/it]
Epoch 4/5: 100%|██████████| 25/25 [00:36<00:00,  1.46s/it]
Epoch 5/5: 100%|██████████| 25/25 [00:33<00:00,  1.36s/it]


✅ Accuracy: 0.7268

🔧 Training with LR=0.0001, Batch=16, Epochs=3


Epoch 1/3: 100%|██████████| 49/49 [00:34<00:00,  1.42it/s]
Epoch 2/3: 100%|██████████| 49/49 [00:37<00:00,  1.30it/s]
Epoch 3/3: 100%|██████████| 49/49 [00:34<00:00,  1.42it/s]


✅ Accuracy: 0.7629

🔧 Training with LR=0.0001, Batch=16, Epochs=5


Epoch 1/5: 100%|██████████| 49/49 [00:31<00:00,  1.57it/s]
Epoch 2/5: 100%|██████████| 49/49 [00:29<00:00,  1.65it/s]
Epoch 3/5: 100%|██████████| 49/49 [00:29<00:00,  1.65it/s]
Epoch 4/5: 100%|██████████| 49/49 [00:30<00:00,  1.60it/s]
Epoch 5/5: 100%|██████████| 49/49 [00:31<00:00,  1.56it/s]


✅ Accuracy: 0.8093

🔧 Training with LR=0.0001, Batch=32, Epochs=3


Epoch 1/3: 100%|██████████| 25/25 [00:30<00:00,  1.23s/it]
Epoch 2/3: 100%|██████████| 25/25 [00:31<00:00,  1.24s/it]
Epoch 3/3: 100%|██████████| 25/25 [00:37<00:00,  1.50s/it]


✅ Accuracy: 0.8196

🔧 Training with LR=0.0001, Batch=32, Epochs=5


Epoch 1/5: 100%|██████████| 25/25 [00:33<00:00,  1.33s/it]
Epoch 2/5: 100%|██████████| 25/25 [00:30<00:00,  1.20s/it]
Epoch 3/5: 100%|██████████| 25/25 [00:30<00:00,  1.20s/it]
Epoch 4/5: 100%|██████████| 25/25 [00:30<00:00,  1.23s/it]
Epoch 5/5: 100%|██████████| 25/25 [00:31<00:00,  1.27s/it]


✅ Accuracy: 0.8402

🥇 Best Hyperparams => LR: 0.0001, Batch: 32, Epochs: 5, Val Acc: 0.8402


[BEST] Epoch 1/5: 100%|██████████| 25/25 [00:21<00:00,  1.15it/s]
[BEST] Epoch 2/5: 100%|██████████| 25/25 [00:21<00:00,  1.16it/s]
[BEST] Epoch 3/5: 100%|██████████| 25/25 [00:21<00:00,  1.14it/s]
[BEST] Epoch 4/5: 100%|██████████| 25/25 [00:22<00:00,  1.12it/s]
[BEST] Epoch 5/5: 100%|██████████| 25/25 [00:22<00:00,  1.10it/s]
📦 Predicting: 100%|██████████| 157/157 [02:39<00:00,  1.02s/it]


✅ Saved: submission.csv
   filename  class
0     1.jpg      1
1    10.jpg      0
2   100.jpg      1
3  1000.jpg      0
4  1001.jpg      1


In [2]:
df = pd.read_csv("submission_hyper_tunned.csv")
df.shape

(5000, 2)

## Apply Efficient net

In [5]:
!pip install optuna -q

In [6]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import optuna

# STEP 1: Build dataframe from folders
def build_dataframe():
    data = []
    for label, folder in enumerate([
        'train/ai/AiArtData', 
        'train/real/RealArt'
    ]):
        for file in os.listdir(folder):
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.gif')):
                data.append({'file_name': os.path.join(folder, file), 'label': label})
    df = pd.DataFrame(data)
    return train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

train_df, val_df = build_dataframe()
test_files = os.listdir('test_data/teamspace/studios/this_studio/final_test_renamed')
test_df = pd.DataFrame({'file_name': [f'test_data/teamspace/studios/this_studio/final_test_renamed/{x}' for x in test_files]})

# STEP 2: Define Dataset
class ImageDataset(Dataset):
    def __init__(self, df, transform=None, is_test=False):
        self.df = df
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        path = self.df.iloc[idx]['file_name']
        image = Image.open(path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        if self.is_test:
            return image, -1
        else:
            label = int(self.df.iloc[idx]['label'])
            return image, label

# STEP 3: Define transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# STEP 4: Define training logic inside Optuna trial
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def objective(trial):
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-3)
    wd = trial.suggest_loguniform("weight_decay", 1e-6, 1e-2)

    train_dataset = ImageDataset(train_df, transform=train_transform)
    val_dataset = ImageDataset(val_df, transform=test_transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
    num_ftrs = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(num_ftrs, 2)

    for param in model.parameters():
        param.requires_grad = False
    for param in model.features[-3:].parameters():
        param.requires_grad = True
    for param in model.classifier.parameters():
        param.requires_grad = True

    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)

    best_val_acc = 0
    for epoch in range(10):  # Keep it short for tuning
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        val_acc = correct / total
        best_val_acc = max(best_val_acc, val_acc)
    return best_val_acc

# STEP 5: Run Optuna tuning
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=15)
print("Best hyperparameters:", study.best_params)

# STEP 6: Train final model using best params
best_params = study.best_params
final_train_dataset = ImageDataset(train_df, transform=train_transform)
final_val_dataset = ImageDataset(val_df, transform=test_transform)
final_test_dataset = ImageDataset(test_df, transform=test_transform, is_test=True)

train_loader = DataLoader(final_train_dataset, batch_size=best_params['batch_size'], shuffle=True)
val_loader = DataLoader(final_val_dataset, batch_size=best_params['batch_size'], shuffle=False)
test_loader = DataLoader(final_test_dataset, batch_size=best_params['batch_size'], shuffle=False)

model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

for param in model.parameters():
    param.requires_grad = False
for param in model.features[-3:].parameters():
    param.requires_grad = True
for param in model.classifier.parameters():
    param.requires_grad = True

model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=best_params['lr'], weight_decay=best_params['weight_decay'])
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

best_val_acc = 0
for epoch in range(20):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    val_correct = 0
    val_total = 0
    model.eval()
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

    acc = 100 * val_correct / val_total
    print(f"Epoch {epoch+1}: Train Acc = {100*correct/total:.2f}%, Val Acc = {acc:.2f}%")
    scheduler.step()

    if acc > best_val_acc:
        best_val_acc = acc
        torch.save(model.state_dict(), 'best_model.pth')

# STEP 7: Generate submission
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
preds = []
with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        preds.extend(predicted.cpu().numpy())

submission = pd.DataFrame({
    'id': test_df['file_name'],
    'label': preds
})
submission.to_csv('submission.csv', index=False)
print(submission.head())

# STEP 8: Evaluation metrics on validation
all_preds, all_labels = [], []
model.eval()
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(f"Precision: {precision_score(all_labels, all_preds, average='binary'):.4f}")
print(f"Recall: {recall_score(all_labels, all_preds, average='binary'):.4f}")
print(f"F1 Score: {f1_score(all_labels, all_preds, average='binary'):.4f}")


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-04-05 16:52:02,387] A new study created in memory with name: no-name-0f953121-a732-43ac-961b-2e5529ec1846
  lr = trial.suggest_loguniform("lr", 1e-5, 1e-3)
  wd = trial.suggest_loguniform("weight_decay", 1e-6, 1e-2)
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to C:\Users\Asus/.cache\torch\hub\checkpoints\efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 28.5MB/s]
[I 2025-04-05 16:58:31,401] Trial 0 finished with value: 0.8153846153846154 and parameters: {'batch_size': 128, 'lr': 0.0009103720824170017, 'weight_decay': 5.785612347423647e-06}. Best is trial 0 with value: 0.8153846153846154.
[I 2025-04-05 17:04:20,816] Trial 1 finished with value: 0.841025641025641 and parameters: {'batch_size': 32, 'lr': 0.00024283165457452815, 'weight_decay': 0.005677519185160485}. Best is trial 1 with value: 0.841025641025641.
[I 2025-04-05 17:09:52,728] Trial 2 finishe

KeyboardInterrupt: 

In [7]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

# Build dataset from folders
def build_dataframe():
    data = []
    for label, folder in enumerate([
        'train/ai/AiArtData', 
        'train/real/RealArt'
    ]):
        for file in os.listdir(folder):
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.gif')):
                data.append({'file_name': os.path.join(folder, file), 'label': label})
    df = pd.DataFrame(data)
    return train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

train_df, val_df = build_dataframe()
test_files = os.listdir('test_data/teamspace/studios/this_studio/final_test_renamed')
test_df = pd.DataFrame({'file_name': [f'test_data/teamspace/studios/this_studio/final_test_renamed/{x}' for x in test_files]})

# Dataset
class ImageDataset(Dataset):
    def __init__(self, df, transform=None, is_test=False):
        self.df = df
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        path = self.df.iloc[idx]['file_name']
        image = Image.open(path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        if self.is_test:
            return image, -1
        else:
            label = int(self.df.iloc[idx]['label'])
            return image, label

# Transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Hyperparameters from Optuna
BATCH_SIZE = 64
LR = 0.0007619201919041039
WEIGHT_DECAY = 0.00038036142328661356

# DataLoaders
train_dataset = ImageDataset(train_df, transform=train_transform)
val_dataset = ImageDataset(val_df, transform=test_transform)
test_dataset = ImageDataset(test_df, transform=test_transform, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 2)

# Unfreeze last few layers
for param in model.parameters():
    param.requires_grad = False
for param in model.features[-3:].parameters():
    param.requires_grad = True
for param in model.classifier.parameters():
    param.requires_grad = True

model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# Training loop
best_val_acc = 0
for epoch in range(20):
    model.train()
    correct = 0
    total = 0
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total

    # Validation
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == labels).sum().item()
            val_total += labels.size(0)

    val_acc = 100 * val_correct / val_total
    print(f"Epoch {epoch+1}: Train Acc = {train_acc:.2f}%, Val Acc = {val_acc:.2f}%")
    scheduler.step()

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')

# Load best model for inference
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

# Inference on test data
preds = []
with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        preds.extend(predicted.cpu().numpy())

# Submission
submission = pd.DataFrame({
    'id': test_df['file_name'].apply(lambda x: os.path.basename(x)),
    'label': preds
})
submission.to_csv('submission.csv', index=False)
print("✅ submission.csv saved successfully!")
print(submission.head())




Epoch 1: Train Acc = 70.86%, Val Acc = 81.03%
Epoch 2: Train Acc = 92.04%, Val Acc = 83.59%
Epoch 3: Train Acc = 94.99%, Val Acc = 81.03%
Epoch 4: Train Acc = 97.18%, Val Acc = 86.15%
Epoch 5: Train Acc = 97.95%, Val Acc = 83.59%
Epoch 6: Train Acc = 99.49%, Val Acc = 84.10%
Epoch 7: Train Acc = 98.46%, Val Acc = 85.13%
Epoch 8: Train Acc = 99.49%, Val Acc = 84.62%
Epoch 9: Train Acc = 99.49%, Val Acc = 85.13%
Epoch 10: Train Acc = 99.74%, Val Acc = 85.64%
Epoch 11: Train Acc = 100.00%, Val Acc = 85.64%
Epoch 12: Train Acc = 98.97%, Val Acc = 85.13%
Epoch 13: Train Acc = 99.74%, Val Acc = 85.64%
Epoch 14: Train Acc = 100.00%, Val Acc = 85.64%
Epoch 15: Train Acc = 99.61%, Val Acc = 84.62%
Epoch 16: Train Acc = 99.61%, Val Acc = 85.64%
Epoch 17: Train Acc = 99.36%, Val Acc = 86.15%
Epoch 18: Train Acc = 99.49%, Val Acc = 85.64%
Epoch 19: Train Acc = 99.74%, Val Acc = 85.64%
Epoch 20: Train Acc = 99.61%, Val Acc = 85.13%


  model.load_state_dict(torch.load('best_model.pth'))


✅ submission.csv saved successfully!
         id  label
0     1.jpg      0
1    10.jpg      1
2   100.jpg      0
3  1000.jpg      1
4  1001.jpg      0


In [8]:
torch.save(model.state_dict(), 'best_model.pth')

In [9]:
model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 2)
model.load_state_dict(torch.load('best_model.pth'))
model.to(device)
model.eval()

  model.load_state_dict(torch.load('best_model.pth'))


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat