# Classification model training script
Goal of model is to classify action between attack/explore

In [1]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

##  Dataloader construction
All images are stored inside folder 'predicted_data/', the data is labeled based on subfolder 'attack_img':[1, 0] or 'explore_img': [0, 1]. All images are sized 640x360 are here transformed to tensors

In [3]:
class ClassificationDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_mapping = {
            'attack_img': [1, 0],
            'explore_img': [0, 1]
        }
        self.load_dataset()

    def load_dataset(self):
        for folder_name in ['attack_img', 'explore_img']:
            folder_full_path = os.path.join(self.folder_path, folder_name)
            class_label = self.class_mapping[folder_name]
            for filename in os.listdir(folder_full_path):
                if filename.endswith(".jpg"):
                    img_path = os.path.join(folder_full_path, filename)
                    self.images.append(img_path)
                    self.labels.append(class_label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        img = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        
        if self.transform:
            img = self.transform(img)
        
        return img, label

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((360, 640)),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


folder_path = 'augmented_data/'
# folder_path = 'classification_data/'
dataset = ClassificationDataset(folder_path, transform=transform)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)



## CNN structure:
Model consists of 3 convolutional layers, 1 max pooling layer to lower computation cost, and 3 fully connected layers, with output layer of 2 neurons attack/explore. <br>

The input for the model is 640x360 tensor after convolutional layers results in 64 feature maps with size 45x80(0.125 * 640x480).

In [4]:
class ClassificationCNN(nn.Module):
    def __init__(self):
        super(ClassificationCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 45 * 80, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 2)  
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 45 * 80)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

model = ClassificationCNN().to(device)

criterion = nn.BCEWithLogitsLoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

Using device: cuda


## Further training of already existent model

In [5]:
model.load_state_dict(torch.load('class_torch.pth'))

num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')

# Evaluation loop
model.eval()
with torch.no_grad():
    val_loss = 0.0
    correct = 0
    total = 0
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)  
        outputs = model(images)
        loss = criterion(outputs, labels)
        val_loss += loss.item()
        
        predicted = torch.round(torch.sigmoid(outputs))
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    print(f'Validation Loss: {val_loss / len(val_loader):.4f}')

Epoch [1/5], Loss: 0.7975
Epoch [2/5], Loss: 0.5237
Epoch [3/5], Loss: 0.3854
Epoch [4/5], Loss: 0.2826
Epoch [5/5], Loss: 0.2064
Validation Loss: 0.7216


In [6]:
torch.save(model.state_dict(), 'class_torch2.pth')