In [1]:


import numpy as np 
import pandas as pd 
 

import os

In [4]:
import os
from torchvision import datasets
from torch.utils.data import DataLoader

#dataset base path relative to notebook 
dataset_root = "../../Dataset"

# full paths to each data split 
train_path = os.path.join(dataset_root, "Train")
val_path = os.path.join(dataset_root, "Validation")
test_path = os.path.join(dataset_root, "Test")

#check that paths exist
print("Train path exists:", os.path.exists(train_path))
print("Validation path exists:", os.path.exists(val_path))
print("Test path exists:", os.path.exists(test_path))

#list folders - shld show real n fake 
print("Train classes:", os.listdir(train_path))



In [5]:
from collections import Counter
from PIL import Image
import os 
## checking the variation in image sizes to check if we need any preprocessing
img_size=Counter()
no_samples=500
sampled_img=[]
for x in os.listdir(train_path):
    x_path=os.path.join(train_path,x)
    if os.path.isdir(x_path):
        for img in os.listdir(x_path)[:no_samples]:
            img_path=os.path.join(x_path,img)
            with Image.open(img_path) as img_obj:
                img_size[img_obj.size]+=1
                sampled_img.append(img_path)
for size,count in img_size.most_common(20):
    print(f"Top 20 most frequent sizes: size: {size}, count: {count}")


In [6]:
from torchvision import transforms
#data preprocessing and augmentation
transformation_for_train = transforms.Compose([transforms.Resize((260,260)), transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
transformation_for_valntest = transforms.Compose([transforms.Resize((260,260)),  transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [7]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
#data loading
train_dataset= datasets.ImageFolder(root=train_path, transform=transformation_for_train)
val_dataset= datasets.ImageFolder(root=val_path, transform=transformation_for_valntest)
test_dataset= datasets.ImageFolder(root=test_path, transform=transformation_for_valntest)
batchsize = 32

train_loader= DataLoader(train_dataset, batch_size=batchsize,shuffle=True)
val_loader= DataLoader(val_dataset, batch_size=batchsize,shuffle=True)
test_loader= DataLoader(test_dataset, batch_size=batchsize,shuffle=True)

In [9]:
from torchvision import models
import torch.nn as nn 
# loading pretrained model and replacing the last layer with a fully connected layer
efficientnetmodel = models.efficientnet_b4(pretrained=True)
no_features = efficientnetmodel.classifier[1].in_features  
efficientnetmodel.classifier[1] = nn.Linear(no_features, 1) 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
efficientnetmodel = efficientnetmodel.to(device)

In [10]:
def freeze_everything_except_classifier(model):
    for params in model.features.parameters():
        params.requires_grad=False
    print("phase 1 - only training classifier")

In [11]:
def unfreeze_lasttwo_block(model):
    for name,params in model.named_parameters():
        if "features.6" in name or "features.7" in name or "classifier" in name:
            params.requires_grad = True
        else:
            params.requires_grad = False
    print("phase 2 - training last 2 blocks and classifier")

In [12]:
def unfreeze_last_four_blocks(model):
    for name, params in model.named_parameters():
        if "features.4" in name or "features.5" in name or "features.6" in name or "features.7" in name or "classifier" in name:
            params.requires_grad = True
        else:
            params.requires_grad = False
    print("Phase 3 - Training last 4 blocks and classifier")

In [13]:
def unfreeze_whole_model(model):
    for params in model.parameters():
        params.requires_grad = True 
    print("phase 4 - whole model training")

In [14]:
import torch.optim as optim
criterion = nn.BCEWithLogitsLoss()



In [15]:
num_epochs = 15
phases = [
    {"epochs": 5, "unfreeze": freeze_everything_except_classifier, "lr": 0.001},   #respective learning rates for each phase 
    {"epochs": 5, "unfreeze": unfreeze_lasttwo_block, "lr": 0.0001},    # higher LR for newly added classifier layers to learn task-specific features                   
    {"epochs": 5, "unfreeze": unfreeze_last_four_blocks, "lr": 0.00001},      #lower LR for deeper pretrained layers to avoid overwriting useful features                                                     
    {"epochs": 5, "unfreeze": unfreeze_whole_model, "lr": 1e-6},       
]

for phase_idx, phase in enumerate(phases):
    phase["unfreeze"](efficientnetmodel)
    optimiser = optim.Adam(efficientnetmodel.parameters(), lr=phase["lr"])
    print(f"\n--- starting Phase {phase_idx+1}: {phase['unfreeze'].__name__} | Learning Rate: {phase['lr']} ---")

    for epoch in range(phase["epochs"]):
        efficientnetmodel.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.float().to(device)
            optimiser.zero_grad()
            outputs = efficientnetmodel(inputs).squeeze()
            loss = criterion(outputs, labels)
            loss.backward()
            optimiser.step()

            running_loss += loss.item()

            #compute training accuracy
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_accuracy = 100 * correct_train / total_train
        avg_loss = running_loss / len(train_loader)

        print(f"phase {phase_idx+1} | epoch [{epoch+1}/{phase['epochs']}]: "
              f"loss = {avg_loss:.4f}, train acc = {train_accuracy:.2f}%")

        # save checkpoint
        checkpoint_filename = f"efficientnet_phase{phase_idx}_epoch{epoch+1}.pth"
        torch.save(efficientnetmodel.state_dict(), checkpoint_filename)
        print(f"model saved as {checkpoint_filename}")

    # evaluate on valiation 
    efficientnetmodel.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.float().to(device)
            outputs = efficientnetmodel(inputs).squeeze()
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_accuracy = 100 * correct / total
    print(f"phase {phase_idx+1} Validation Accuracy: {val_accuracy:.2f}%\n")
