In [4]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

In [5]:
# Build dataset from folders
def build_dataframe():
    data = []
    for label, folder in enumerate(['ai', 'real']):
        n = 0
        for file in os.listdir(folder):
            if n == 30000:
                break
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.gif')):
                n += 1
                data.append({'file_name': os.path.join(folder, file), 'label': label})
    df = pd.DataFrame(data)
    return train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

train_df, val_df = build_dataframe()
test_files = os.listdir('test_data/teamspace/studios/this_studio/final_test_renamed')
test_df = pd.DataFrame({'file_name': [f'test_data/teamspace/studios/this_studio/final_test_renamed/{x}' for x in test_files]})

# Dataset
class ImageDataset(Dataset):
    def __init__(self, df, transform=None, is_test=False):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        path = self.df.iloc[idx]['file_name']
        try:
            image = Image.open(path).convert('RGB')
        except (OSError, ValueError) as e:
            print(f"⚠️ Skipping corrupted image: {path}")
            # Fallback: Return a black image
            image = Image.new('RGB', (224, 224), (0, 0, 0))
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:
            return image, -1
        else:
            label = int(self.df.iloc[idx]['label'])
            return image, label


# Transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Hyperparameters from Optuna
BATCH_SIZE = 128
LR = 0.000762
WEIGHT_DECAY = 0.00038036

# DataLoaders (⬅️ Made memory-efficient: num_workers=2 and pin_memory=True)
train_dataset = ImageDataset(train_df, transform=train_transform)
val_dataset = ImageDataset(val_df, transform=test_transform)
test_dataset = ImageDataset(test_df, transform=test_transform, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

In [6]:
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = efficientnet_b0(weights=None)  # ⬅️ No default weights since we are loading our own
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, 2)

model.load_state_dict(torch.load('best_model_4_epoch 8_train_98.37953572233491_val_91.9138195258271.pth', map_location=device))  # ⬅️ Load from given path
model = model.to(device)
model.eval() 

# Inference on test data
preds = []
with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        preds.extend(predicted.cpu().numpy())

# Submission
submission = pd.DataFrame({
    'filename': test_df['file_name'].apply(lambda x: os.path.basename(x)),
    'class': preds
})
submission.to_csv('final_submission_4_8.csv', index=False)
print("✅ final_submission.csv saved successfully!")
print(submission.head())

  model.load_state_dict(torch.load('best_model_4_epoch 8_train_98.37953572233491_val_91.9138195258271.pth', map_location=device))  # ⬅️ Load from given path


✅ final_submission.csv saved successfully!
   filename  class
0     1.jpg      0
1    10.jpg      1
2   100.jpg      1
3  1000.jpg      0
4  1001.jpg      1
