In [1]:
import torch, torchvision
from torch import nn
from torchvision.datasets import ImageFolder
from torchvision.transforms import v2
from PIL import Image
from torch.utils.data import DataLoader, random_split

device = "cuda" if torch.cuda.is_available() else "cpu"


In [2]:
# defining class that will load all images into ram memory

class InMemoryImageFolder(torch.utils.data.Dataset):
    def __init__(self, root, transform=None):
        self.dataset = ImageFolder(root=root)
        self.transform = transform

        # Load all images into memory
        self.images = []
        self.labels = []

        for path, label in self.dataset.samples:
            image = Image.open(path).convert('L') # L for grayscale
            if self.transform:
                image = self.transform(image)
            self.images.append(image)
            self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

In [3]:
# loading the dataset

transforms = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])

dataset = InMemoryImageFolder(root="./dataset", transform=transforms)

In [4]:
dataset.images[0].shape

torch.Size([1, 360, 640])

In [5]:
# dividing dataset into train and test

train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size

generator = torch.Generator().manual_seed(42)   # generator must be added for shuffling of data

train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator = torch.Generator().manual_seed(42))
len(train_dataset), len(test_dataset)

(11232, 1248)

In [6]:
# creating dataloaders

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

In [7]:
# Defining convolutional model

class BinaryFootprintClassifierModel(torch.nn.Module):
    def __init__(self):
        super().__init__() 
        
        self.layers1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5, padding=2, stride=1),
            nn.LeakyReLU(),
            nn.Conv2d(in_channels=8, out_channels=32, kernel_size=3, padding=1, stride=1),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=5, stride=3),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        
        with torch.no_grad():
            dummy_input = torch.zeros(1, 1, *dataset.images[0].shape[1:])
            dummy_output = self.layers1(dummy_input)
            self.flattened_size = dummy_output.view(1, -1).shape[1]    
            
        self.layers2 = nn.Sequential(
            nn.Flatten(start_dim=1, end_dim=-1),
            nn.Linear(in_features=self.flattened_size, out_features=512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=1),
        )
        
    def forward(self, x):
        return self.layers2(self.layers1(x))
    
model = BinaryFootprintClassifierModel().to(device)

In [8]:
# definning optimoizer and loss function

optimizer = torch.optim.Adam(params=model.parameters(), lr=0.00001)
lossfn = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([1/7]).to(device)).to(device)     # class 1 is 7 times more common that class 0, dataset is not balanced

In [None]:
# creating training loop

epochs = 10

for epoch in range(epochs):
    training_losses = []
    
    for X, y in train_loader:
        X = X.to(device)
        y = y.to(torch.float32).unsqueeze(1).to(device)     
        
        model.train()
        logits = model(X)  
        loss = lossfn(logits, y)
        
        training_losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    
    if epoch % 1 == 0:
        avg_training_loss = sum(training_losses)/len(training_losses)

    
    
    
    
    
        print(f"epoch {epoch} | training loss {avg_training_loss}")
        

epoch 0 | training loss 0.1278342129265461
epoch 1 | training loss 0.07081187718115137
epoch 2 | training loss 0.04950165696167061
epoch 3 | training loss 0.03701441557833223
epoch 4 | training loss 0.02887316001350739
epoch 5 | training loss 0.02348339640082096
epoch 6 | training loss 0.01829730362968578
epoch 7 | training loss 0.01580826109779394
epoch 8 | training loss 0.013352854123154189
epoch 9 | training loss 0.010273649119120166
