In [182]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Subset, DataLoader
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

cuda


In [183]:
DATA_DIR = "../dataset" 
MODEL_DIR = "../models"
torch.hub.set_dir(MODEL_DIR)
torch.hub.get_dir()

'../models'

In [184]:
IMG_SIZE = 224
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

full_dataset = datasets.ImageFolder(root=DATA_DIR)
print(full_dataset.class_to_idx)

indices = list(range(len(full_dataset)))
random.shuffle(indices)
train_size = int(0.8 * len(full_dataset))
train_indices = indices[:train_size]
test_indices = indices[train_size:]

train_dataset = Subset(full_dataset, train_indices)
test_dataset = Subset(full_dataset, test_indices)

print(len(train_indices), len(test_indices))

train_dataset.dataset.transform = train_transform
test_dataset.dataset.transform = test_transform


{'fake_segments': 0, 'real_segments': 1}
163 41


In [212]:
model = torch.hub.load(f"{MODEL_DIR}/pytorch_vision_v0.10.0", 'mobilenet_v2', source="local")
model.classifier[1] = nn.Linear(32, 2)
model.classifier[0] = torch.nn.Dropout(p=0.2)

In [213]:
num_params = sum(p.numel() for p in model.parameters())
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(num_params, num_trainable_params)

2223938 2223938


In [214]:
model.features = nn.Sequential(*list(model.features.children())[:5])
model

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [215]:
num_params = sum(p.numel() for p in model.parameters())
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(num_params, num_trainable_params)

25858 25858


In [216]:
# for param in model.features.parameters():
#     param.requires_grad = False
# 
# for param in model.features[-3:].parameters():
#     param.requires_grad = True

In [217]:
num_params = sum(p.numel() for p in model.parameters())
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(num_params, num_trainable_params)

25858 25858


In [218]:
def train(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        train_correct = 0
        total_train = 0

        train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Training]", leave=False)
        for images, labels in train_loop:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

            _, preds = torch.max(outputs, 1)
            train_correct += (preds == labels).sum().item()
            total_train += labels.size(0)

        avg_train_loss = running_loss / total_train
        train_accuracy = train_correct / total_train

        # Validation
        model.eval()
        val_loss = 0.0
        val_correct = 0
        total_val = 0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            val_loop = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]", leave=False)
            for images, labels in val_loop:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * images.size(0)

                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                total_val += labels.size(0)

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        avg_val_loss = val_loss / total_val
        val_accuracy = val_correct / total_val

        precision = precision_score(all_labels, all_preds, average='binary')
        recall = recall_score(all_labels, all_preds, average='binary')

        print(f"Epoch [{epoch+1}/{num_epochs}]")
        print(f"  Train Loss: {avg_train_loss:.4f} | Train Acc: {train_accuracy:.4f}")
        print(f"  Val Loss:   {avg_val_loss:.4f} | Val Acc:   {val_accuracy:.4f}")
        print(f"  Precision:  {precision:.4f} | Recall:    {recall:.4f}")
        print("-" * 50)


In [219]:
backbone_params = [param for param in model.features.parameters() if param.requires_grad]
head_params = [param for param in model.classifier.parameters()]

batch_size = 16
lr = 1e-3

criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam([
#     {'params': backbone_params, 'lr': 1e-3}, 
#     {'params': head_params, 'lr': 1e-3}],)
#     # weight_decay=1e-5)
optimizer = optim.Adam(model.parameters(), lr=lr,)
    # weight_decay=1e-4)


In [220]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

for img, idx in train_loader:
    print(img.shape, idx.shape)
    break

torch.Size([16, 3, 224, 224]) torch.Size([16])


In [221]:
train(model, train_loader, test_loader, criterion, optimizer, DEVICE, num_epochs=20)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch [1/20]
  Train Loss: 0.6952 | Train Acc: 0.4969
  Val Loss:   0.7002 | Val Acc:   0.5122
  Precision:  0.0000 | Recall:    0.0000
--------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch [2/20]
  Train Loss: 0.6866 | Train Acc: 0.5706
  Val Loss:   0.6959 | Val Acc:   0.5122
  Precision:  0.0000 | Recall:    0.0000
--------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch [3/20]
  Train Loss: 0.6842 | Train Acc: 0.5706
  Val Loss:   0.6933 | Val Acc:   0.5122
  Precision:  0.0000 | Recall:    0.0000
--------------------------------------------------


                                                                      

Epoch [4/20]
  Train Loss: 0.6769 | Train Acc: 0.5890
  Val Loss:   0.6961 | Val Acc:   0.4878
  Precision:  0.4878 | Recall:    1.0000
--------------------------------------------------


                                                                      

Epoch [5/20]
  Train Loss: 0.6671 | Train Acc: 0.6687
  Val Loss:   0.6994 | Val Acc:   0.4634
  Precision:  0.4750 | Recall:    0.9500
--------------------------------------------------


                                                                      

Epoch [6/20]
  Train Loss: 0.6561 | Train Acc: 0.6442
  Val Loss:   0.7220 | Val Acc:   0.4878
  Precision:  0.4706 | Recall:    0.4000
--------------------------------------------------


                                                                      

Epoch [7/20]
  Train Loss: 0.6420 | Train Acc: 0.7178
  Val Loss:   0.7268 | Val Acc:   0.4390
  Precision:  0.0000 | Recall:    0.0000
--------------------------------------------------


                                                                      

Epoch [8/20]
  Train Loss: 0.6201 | Train Acc: 0.7730
  Val Loss:   0.7662 | Val Acc:   0.3659
  Precision:  0.2000 | Recall:    0.1000
--------------------------------------------------


                                                                      

Epoch [9/20]
  Train Loss: 0.5788 | Train Acc: 0.7669
  Val Loss:   0.7678 | Val Acc:   0.4390
  Precision:  0.4545 | Recall:    0.7500
--------------------------------------------------


                                                                       

Epoch [10/20]
  Train Loss: 0.5348 | Train Acc: 0.8712
  Val Loss:   0.7520 | Val Acc:   0.4390
  Precision:  0.4286 | Recall:    0.4500
--------------------------------------------------


                                                                       

Epoch [11/20]
  Train Loss: 0.4555 | Train Acc: 0.9080
  Val Loss:   0.8629 | Val Acc:   0.4390
  Precision:  0.3846 | Recall:    0.2500
--------------------------------------------------


                                                                       

Epoch [12/20]
  Train Loss: 0.4169 | Train Acc: 0.9141
  Val Loss:   0.8486 | Val Acc:   0.3902
  Precision:  0.4242 | Recall:    0.7000
--------------------------------------------------


                                                                       

Epoch [13/20]
  Train Loss: 0.3332 | Train Acc: 0.9632
  Val Loss:   0.8311 | Val Acc:   0.4146
  Precision:  0.4231 | Recall:    0.5500
--------------------------------------------------


                                                                       

Epoch [14/20]
  Train Loss: 0.3084 | Train Acc: 0.9571
  Val Loss:   0.9326 | Val Acc:   0.3415
  Precision:  0.4000 | Recall:    0.7000
--------------------------------------------------


                                                                       

Epoch [15/20]
  Train Loss: 0.2174 | Train Acc: 0.9877
  Val Loss:   1.1014 | Val Acc:   0.3659
  Precision:  0.4167 | Recall:    0.7500
--------------------------------------------------


                                                                       

Epoch [16/20]
  Train Loss: 0.1692 | Train Acc: 0.9816
  Val Loss:   0.9776 | Val Acc:   0.4878
  Precision:  0.4706 | Recall:    0.4000
--------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch [17/20]
  Train Loss: 0.1571 | Train Acc: 0.9877
  Val Loss:   1.3797 | Val Acc:   0.5122
  Precision:  0.0000 | Recall:    0.0000
--------------------------------------------------


                                                                       

Epoch [18/20]
  Train Loss: 0.1550 | Train Acc: 0.9755
  Val Loss:   0.9742 | Val Acc:   0.4634
  Precision:  0.4444 | Recall:    0.4000
--------------------------------------------------


                                                                       

Epoch [19/20]
  Train Loss: 0.0918 | Train Acc: 0.9939
  Val Loss:   1.3442 | Val Acc:   0.4634
  Precision:  0.4000 | Recall:    0.2000
--------------------------------------------------


                                                                       

Epoch [20/20]
  Train Loss: 0.0746 | Train Acc: 1.0000
  Val Loss:   1.4244 | Val Acc:   0.4390
  Precision:  0.4615 | Recall:    0.9000
--------------------------------------------------




In [195]:
# import torch
# import torch.nn.functional as F
# import torchvision.transforms as transforms
# import matplotlib.pyplot as plt
# import numpy as np
# import cv2
# 
# # Helper function to preprocess a single image
# def preprocess_image(img_path):
#     transform = transforms.Compose([
#         transforms.Resize((224, 224)),  # adjust if needed
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], 
#                              std=[0.229, 0.224, 0.225])
#     ])
#     from PIL import Image
#     img = Image.open(img_path).convert('RGB')
#     return transform(img).unsqueeze(0)  # Add batch dimension
# 
# # Hook to capture gradients and activations
# class ActivationsAndGradients:
#     def __init__(self, model, target_layer):
#         self.model = model
#         self.target_layer = target_layer
#         self.gradients = None
#         self.activations = None
# 
#         self.hook_a = self.target_layer.register_forward_hook(self.save_activation)
#         self.hook_g = self.target_layer.register_full_backward_hook(self.save_gradient)  # <- FULL backward hook only
# 
#     def save_activation(self, module, input, output):
#         self.activations = output
# 
#     def save_gradient(self, module, grad_input, grad_output):
#         self.gradients = grad_output[0]
# 
#     def remove(self):
#         self.hook_a.remove()
#         self.hook_g.remove()
# 
# # GradCAM computation
# def compute_gradcam(model, image_tensor, target_layer, class_idx=None):
#     model.eval()
#     device = next(model.parameters()).device
#     image_tensor = image_tensor.to(device)
# 
#     hook = ActivationsAndGradients(model, target_layer)
# 
#     output = model(image_tensor)  # Forward pass
#     if class_idx is None:
#         class_idx = output.argmax(dim=1).item()
# 
#     loss = output[:, class_idx]
#     model.zero_grad()
#     loss.backward()
# 
#     # Get gradients and activations
#     gradients = hook.gradients  # [B, C, H, W]
#     activations = hook.activations  # [B, C, H, W]
#     hook.remove()
# 
#     # Compute weights: global average pooling the gradients
#     weights = gradients.mean(dim=(2, 3), keepdim=True)  # [B, C, 1, 1]
#     gradcam_map = (weights * activations).sum(dim=1, keepdim=True)  # [B, 1, H, W]
#     gradcam_map = F.relu(gradcam_map)
# 
#     # Normalize
#     gradcam_map = F.interpolate(gradcam_map, size=(224, 224), mode='bilinear', align_corners=False)
#     gradcam_map = gradcam_map.squeeze().detach().cpu().numpy()
#     gradcam_map = (gradcam_map - gradcam_map.min()) / (gradcam_map.max() - gradcam_map.min() + 1e-8)
# 
#     return gradcam_map
# 
# # Function to visualize GradCAM overlay
# def show_gradcam_on_image(img_path, gradcam_map, alpha=0.5):
#     img = cv2.imread(img_path)
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#     img = cv2.resize(img, (224, 224))
# 
#     heatmap = cv2.applyColorMap(np.uint8(255 * gradcam_map), cv2.COLORMAP_JET)
#     heatmap = np.float32(heatmap) / 255
# 
#     cam = heatmap + np.float32(img) / 255
#     cam = cam / np.max(cam)
# 
#     plt.imshow(cam)
#     plt.axis('off')
#     plt.show()


In [196]:
# # # Load your model
# # model = torch.hub.load(f"{MODEL_DIR}/pytorch_vision_v0.10.0", 'mobilenet_v2', source="local")
# # model.classifier[1] = torch.nn.Linear(1280, 2)  # your modification
# model.eval()
# 
# # Move to GPU if available
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model.to(device)
# 
# # Load an image
# img_path = f"{DATA_DIR}/fake/001_08_q32.jpg"
# img_tensor = preprocess_image(img_path)
# 
# # Pick a target layer manually
# target_layer = model.features[1]  # Example: you can pick any layer like 3, 5, 13, etc.
# 
# # Compute GradCAM
# gradcam_map = compute_gradcam(model, img_tensor, target_layer)
# 
# # Show GradCAM
# show_gradcam_on_image(img_path, gradcam_map)
