In [1]:
import torch, torchvision
from torch import nn
from torchvision.datasets import ImageFolder
from torchvision.transforms import v2
from PIL import Image
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy, Precision, Recall
import torch.nn.functional as F


device = "cuda" if torch.cuda.is_available() else "cpu"


In [2]:
# defining class that will load all images into ram memory

class InMemoryImageFolder(torch.utils.data.Dataset):
    def __init__(self, root, transform=None):
        self.dataset = ImageFolder(root=root)
        self.transform = transform

        # Load all images into memory
        self.images = []
        self.labels = []

        for path, label in self.dataset.samples:
            image = Image.open(path).convert('L') # L for grayscale
            if self.transform:
                image = self.transform(image)
            self.images.append(image)
            
            if label == 0:
                label = 1.0
            else:
                label = 0.0
            
            self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

In [3]:
# loading the dataset

transforms = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])

dataset = InMemoryImageFolder(root="./dataset", transform=transforms)

In [4]:
dataset.images[0].shape

torch.Size([1, 360, 640])

In [5]:
# dividing dataset into train and test

train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size

generator = torch.Generator().manual_seed(42)   # generator must be added for shuffling of data

train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator = torch.Generator().manual_seed(42))
len(train_dataset), len(test_dataset)

(11232, 1248)

In [6]:
# creating dataloaders

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [None]:
# Defining convolutional model

class BinaryFootprintClassifierModel(torch.nn.Module):
    def __init__(self):
        super().__init__() 
        
        self.target_size = (360, 640)  # (height, width)
        
        self.layers1 = nn.Sequential(
            # convolutional layers
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5, padding=2, stride=1),
            nn.LeakyReLU(),
            nn.Conv2d(in_channels=8, out_channels=32, kernel_size=3, padding=1, stride=1),
            nn.LeakyReLU(),
            
            # pooling and dropout layers
            nn.MaxPool2d(kernel_size=5, stride=3),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout2d(p=0.1),
        )
            
        # deep layers
        self.layers2 = nn.Sequential(
            nn.Flatten(start_dim=1, end_dim=-1),
            nn.Linear(in_features=198240, out_features=1024),
            nn.LeakyReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(in_features=1024, out_features=128),
            nn.LeakyReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(in_features=128, out_features=1),
        )
        
    def forward(self, x):
        
        x = F.interpolate(x, size=self.target_size, mode='bilinear', align_corners=False)
        
        return self.layers2(self.layers1(x))
    
model = BinaryFootprintClassifierModel().to(device)

In [8]:
# definning optimoizer and loss function

optimizer = torch.optim.Adam(params=model.parameters(), lr=0.00001)
lossfn = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([7]).to(device)).to(device)     # class 0 is 7 times more common that class 1, dataset is not balanced

In [None]:
# creating training loop

epochs = 20

for epoch in range(epochs):
    training_losses = []
    
    # training loop
    for X, y in train_loader:
        X = X.to(device)
        y = y.to(torch.float32).unsqueeze(1).to(device)     
        
        model.train()
        logits = model(X)  
        loss = lossfn(logits, y)
        
        training_losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # testing loop
    if epoch % 1 == 0:
        avg_training_loss = sum(training_losses)/len(training_losses)
        testing_losses = []
        accuracy = Accuracy(task='binary').to(device)
        precision = Precision(task='binary').to(device)
        recall = Recall(task="binary").to(device)

        model.eval()
        with torch.inference_mode():
            
            for X, y in test_loader:
                X = X.to(device)
                y = y.to(torch.float32).unsqueeze(1).to(device)
                
                logits = model(X)
                probs = torch.sigmoid(logits)
                preds = (probs>0.5).int()
                
                
                testing_loss = lossfn(logits, y)
                testing_losses.append(testing_loss.item())
                
                accuracy.update(preds, y.int())
                precision.update(preds, y.int())
                recall.update(preds, y.int())
    
    
    
        avg_testing_loss = sum(testing_losses)/len(testing_losses)
        acc = accuracy.compute().item()
        prec = precision.compute().item()
        rec = recall.compute().item()
        
        print(f"epoch {epoch} | training loss {avg_training_loss:.4f} | testing loss {avg_testing_loss:.4f} | acc: {acc:.4f} | prec: {prec:.4f} | recall: {rec:.4f}")
        
        # saving model progress to file
        if (epoch+1) % 5 == 0:
            torch.save(model.state_dict(), f"./models/ver_1/after_epoch_{epoch+1}.pth")
        

epoch 0 | training loss 0.9081 | testing loss 0.6018 | acc: 0.7893 | prec: 0.3741 | recall: 0.9563
epoch 1 | training loss 0.6292 | testing loss 0.5120 | acc: 0.9223 | prec: 0.6438 | recall: 0.8813
epoch 2 | training loss 0.5137 | testing loss 0.4093 | acc: 0.8694 | prec: 0.4951 | recall: 0.9500
epoch 3 | training loss 0.4422 | testing loss 0.3405 | acc: 0.9119 | prec: 0.6000 | recall: 0.9375
epoch 4 | training loss 0.3758 | testing loss 0.3193 | acc: 0.9103 | prec: 0.5938 | recall: 0.9500
epoch 5 | training loss 0.3376 | testing loss 0.3234 | acc: 0.8886 | prec: 0.5356 | recall: 0.9875
epoch 6 | training loss 0.2770 | testing loss 0.2665 | acc: 0.9359 | prec: 0.6724 | recall: 0.9750
epoch 7 | training loss 0.2563 | testing loss 0.2439 | acc: 0.9551 | prec: 0.7500 | recall: 0.9750
epoch 8 | training loss 0.2101 | testing loss 0.2229 | acc: 0.9599 | prec: 0.7696 | recall: 0.9812
epoch 9 | training loss 0.1812 | testing loss 0.2334 | acc: 0.9599 | prec: 0.7750 | recall: 0.9688
epoch 10 |