In [16]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np
from torchvision.datasets import CocoDetection
from torch.utils.data import DataLoader
from torchvision import models
from torch.utils.tensorboard import SummaryWriter  # For visualization
from sklearn.metrics import precision_score, recall_score, f1_score  # For evaluation metrics


# Initialize TensorBoard
writer = SummaryWriter()

# Hyperparameters
num_epochs = 20  # Increased from 10 to 20
batch_size = 4
learning_rate = 0.001
weight_decay = 1e-4  # L2 regularization

# Define transformations with data augmentation
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 pixels
    transforms.RandomHorizontalFlip(),  # Random horizontal flipping
    transforms.RandomRotation(10),  # Random rotation by 10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),  # Randomly adjust brightness, contrast, and saturation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization
])

# Use a simpler transform for validation/test dataset without augmentation
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 pixels
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization
])

# Load COCO dataset with augmentation for training set
train_dataset = CocoDetection(root='./dataset/ZiggoPortStatus-2/train', annFile='./dataset/ZiggoPortStatus-2/train/_annotations.coco.json', transform=train_transform)
# No augmentation for test dataset
test_dataset = CocoDetection(root='./dataset/ZiggoPortStatus-2/test', annFile='./dataset/ZiggoPortStatus-2/test/_annotations.coco.json', transform=test_transform)

# Load category information and define class_names
with open('./dataset/ZiggoPortStatus-2/train/_annotations.coco.json', 'r') as f:
    coco_info = json.load(f)
class_names = {cat['id']: cat['name'] for cat in coco_info['categories']}

# Create a mapping from COCO category IDs to sequential indices
cat_id_to_seq_id = {cat_id: idx for idx, cat_id in enumerate(sorted(class_names.keys()))}

# Model setup
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, len(cat_id_to_seq_id))

# Loss function
criterion = nn.BCEWithLogitsLoss()

# Optimizer with weight decay for L2 regularization
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Scheduler for adjusting learning rate
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, verbose=True)


# Custom collate function for handling COCO annotations
def custom_collate_fn(batch):
    images, annotations = zip(*batch)
    labels_one_hot = torch.zeros((len(annotations), len(cat_id_to_seq_id)))
    for idx, anns in enumerate(annotations):
        for ann in anns:
            seq_id = cat_id_to_seq_id[ann['category_id']]
            labels_one_hot[idx, seq_id] = 1
    return torch.stack(images), labels_one_hot

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)

# Training loop
model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (images, labels_one_hot) in enumerate(train_loader):
        outputs = model(images)
        loss = criterion(outputs, labels_one_hot)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (i + 1) % 10 == 0:  # Log every 10 batches
            current_loss = running_loss / 10
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {current_loss}')
            writer.add_scalar('training loss', current_loss, epoch * len(train_loader) + i)
            running_loss = 0.0

    # Adjust learning rate based on the epoch loss
    scheduler.step(running_loss)

writer.close()  # Close the TensorBoard

# Evaluation with additional metrics
model.eval()
y_true, y_pred = [], []
with torch.no_grad():
    for images, labels_one_hot in test_loader:
        outputs = model(images)
        predicted_labels = torch.sigmoid(outputs) > 0.5
        y_true.extend(labels_one_hot.cpu().numpy())
        y_pred.extend(predicted_labels.cpu().numpy())

# Convert predictions and true labels to binary for evaluation metrics
y_true = np.array(y_true).flatten()
y_pred = np.array(y_pred).flatten()

accuracy = (y_true == y_pred).mean()
precision = precision_score(y_true, y_pred, average='binary')
recall = recall_score(y_true, y_pred, average='binary')
f1 = f1_score(y_true, y_pred, average='binary')

print(f'Test Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')

print('Finished Training')


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Epoch [1/20], Step [10/18], Loss: 0.5703415155410767
Epoch [2/20], Step [10/18], Loss: 0.43279818892478944
Epoch [3/20], Step [10/18], Loss: 0.4207752853631973
Epoch [4/20], Step [10/18], Loss: 0.404787203669548
Epoch [5/20], Step [10/18], Loss: 0.4122891366481781
Epoch 00005: reducing learning rate of group 0 to 1.0000e-04.
Epoch [6/20], Step [10/18], Loss: 0.3784020394086838
Epoch [7/20], Step [10/18], Loss: 0.35817195773124694
Epoch [8/20], Step [10/18], Loss: 0.3513214588165283
Epoch 00008: reducing learning rate of group 0 to 1.0000e-05.
Epoch [9/20], Step [10/18], Loss: 0.39106438457965853
Epoch [10/20], Step [10/18], Loss: 0.41537892520427705
Epoch [11/20], Step [10/18], Loss: 0.3721770316362381
Epoch [12/20], Step [10/18], Loss: 0.4093081772327423
Epoch [13/20], Step [10/18], Loss: 0.4336109280586243
Epoch 00013: r