In [4]:
import torch, torchvision
from torch import nn
from torchvision.datasets import ImageFolder
from torchvision.transforms import v2
from PIL import Image
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy, Precision, Recall
import torch.nn.functional as F


device = "cuda" if torch.cuda.is_available() else "cpu"


In [5]:
# defining class that will load all images into ram memory

class InMemoryImageFolder(torch.utils.data.Dataset):
    def __init__(self, root, transform=None):
        self.dataset = ImageFolder(root=root)
        self.transform = transform

        # Load all images into memory
        self.images = []
        self.labels = []

        for path, label in self.dataset.samples:
            image = Image.open(path).convert('L') # L for grayscale
            if self.transform:
                image = self.transform(image)
            self.images.append(image)
            
            if label == 0:
                label = 0.0
            else:
                label = 1.0
            
            self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

In [6]:
# loading the dataset

transforms = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])

dataset = InMemoryImageFolder(root="./dataset", transform=transforms)

In [7]:
dataset.images[0].shape

torch.Size([1, 360, 640])

In [8]:
# dividing dataset into train and test

train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size

generator = torch.Generator().manual_seed(42)   # generator must be added for shuffling of data

train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator = torch.Generator().manual_seed(42))
len(train_dataset), len(test_dataset)

(11232, 1248)

In [9]:
# creating dataloaders

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [10]:
# Defining convolutional model

class BinaryFootprintClassifierModel(torch.nn.Module):
    def __init__(self):
        super().__init__() 
        
        self.layers1 = nn.Sequential(
            # convolutional layers
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5, padding=2, stride=2),
            nn.LeakyReLU(),
            nn.Conv2d(in_channels=8, out_channels=32, kernel_size=3, padding=1, stride=3),
            nn.LeakyReLU(),
            
            # pooling and dropout layers
            nn.MaxPool2d(kernel_size=5, stride=3),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout2d(p=0.1),
        )
            
        # deep layers
        self.layers2 = nn.Sequential(
            nn.Flatten(start_dim=1, end_dim=-1),
            nn.Linear(in_features=4896, out_features=1024),
            nn.LeakyReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(in_features=1024, out_features=128),
            nn.LeakyReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(in_features=128, out_features=1),
        )
        
    def forward(self, x):
        
        return self.layers2(self.layers1(x))
    
model = BinaryFootprintClassifierModel().to(device)

In [11]:
# definning optimoizer and loss function

optimizer = torch.optim.Adam(params=model.parameters(), lr=0.00001)
lossfn = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([7]).to(device)).to(device)     # class 0 is 7 times more common that class 1, dataset is not balanced

In [12]:
# creating training loop

epochs = 120

for epoch in range(epochs):
    training_losses = []
    
    # training loop
    for X, y in train_loader:
        X = X.to(device)
        y = y.to(torch.float32).unsqueeze(1).to(device)     
        
        model.train()
        logits = model(X)  
        loss = lossfn(logits, y)
        
        training_losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # testing loop
    if (epoch+1) % 5 == 0:
        avg_training_loss = sum(training_losses)/len(training_losses)
        testing_losses = []
        accuracy = Accuracy(task='binary').to(device)
        precision = Precision(task='binary').to(device)
        recall = Recall(task="binary").to(device)

        model.eval()
        with torch.inference_mode():
            
            for X, y in test_loader:
                X = X.to(device)
                y = y.to(torch.float32).unsqueeze(1).to(device)
                
                logits = model(X)
                probs = torch.sigmoid(logits)
                preds = (probs>0.5).int()
                
                
                testing_loss = lossfn(logits, y)
                testing_losses.append(testing_loss.item())
                
                accuracy.update(preds, y.int())
                precision.update(preds, y.int())
                recall.update(preds, y.int())
    
    
    
        avg_testing_loss = sum(testing_losses)/len(testing_losses)
        acc = accuracy.compute().item()
        prec = precision.compute().item()
        rec = recall.compute().item()
        
        print(f"epoch {epoch} | training loss {avg_training_loss:.4f} | testing loss {avg_testing_loss:.4f} | acc: {acc:.4f} | prec: {prec:.4f} | recall: {rec:.4f}")
        
        # saving model progress to file
        if (epoch+1) % 5 == 0:
            torch.save(model.state_dict(), f"./models/ver_2/after_{epoch+1}_epochs.pth")
        

epoch 4 | training loss 0.7318 | testing loss 0.6383 | acc: 0.8718 | prec: 0.4553 | recall: 0.7698
epoch 9 | training loss 0.5582 | testing loss 0.4798 | acc: 0.8550 | prec: 0.4295 | recall: 0.9209
epoch 14 | training loss 0.4306 | testing loss 0.3627 | acc: 0.8934 | prec: 0.5114 | recall: 0.9712
epoch 19 | training loss 0.3421 | testing loss 0.2919 | acc: 0.9143 | prec: 0.5672 | recall: 0.9712
epoch 24 | training loss 0.2711 | testing loss 0.2726 | acc: 0.9503 | prec: 0.7200 | recall: 0.9065
epoch 29 | training loss 0.2277 | testing loss 0.2080 | acc: 0.9455 | prec: 0.6749 | recall: 0.9856
epoch 34 | training loss 0.1855 | testing loss 0.1894 | acc: 0.9439 | prec: 0.6651 | recall: 1.0000
epoch 39 | training loss 0.1512 | testing loss 0.1890 | acc: 0.9679 | prec: 0.8075 | recall: 0.9353
epoch 44 | training loss 0.1209 | testing loss 0.1468 | acc: 0.9704 | prec: 0.8110 | recall: 0.9568
epoch 49 | training loss 0.1028 | testing loss 0.1194 | acc: 0.9744 | prec: 0.8323 | recall: 0.9640
ep