# Making the baseline model and getting things to work

In [2]:
import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets, transforms
from torch import nn, optim
import matplotlib.pyplot as plt
import os
import rasterio
import pickle

device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))

print(f"Training on {device}.")

Training on cpu.


### Import data

In [3]:
def import_training_imgs(train_folder):
    x_train= []

    for file in os.listdir(train_folder):
        if file.endswith('.tif'):
            file_path = os.path.join(train_folder, file)
            with rasterio.open(file_path) as src:
                image_np = src.read()  
                image_tensor = torch.from_numpy(image_np).float()
                x_train.append(image_tensor)
    return x_train

if os.path.exists('./data/loaded/train_images.pkl'):
    print('Loading training images from pickle file...')
    x_train = pickle.load(open('./data/loaded/train_images.pkl', 'rb'))
else:
    print('Importing training images...')
    x_train = import_training_imgs('data/train_images/')
    pickle.dump(x_train, open('data/loaded/train_images.pkl', 'wb'))

Loading training images from pickle file...


In [4]:
def import_training_labels(train_folder):
    y_train = []

    for file in os.listdir(train_folder):
        if file.endswith('.tif'):
            file_path = os.path.join(train_folder, file)
            with rasterio.open(file_path) as src:
                image_np = src.read()
                image_tensor = torch.from_numpy(image_np)
                y_train.append(image_tensor)
    return y_train

In [5]:
y_train = import_training_labels('data/masked_annotations/')

In [94]:
# subsets
x_train = x_train[:10]
y_train = y_train[:10]

In [95]:
x_train

[tensor([[[ 272.0000,  268.0000,  268.0000,  ...,  260.5000,  260.5000,
            260.5000],
          [ 233.0000,  216.0000,  216.0000,  ...,  267.0000,  267.0000,
            267.0000],
          [ 233.0000,  216.0000,  216.0000,  ...,  267.0000,  267.0000,
            267.0000],
          ...,
          [ 618.5000,  636.5000,  636.5000,  ...,  621.5000,  621.5000,
            621.5000],
          [ 618.5000, 1097.0000, 1097.0000,  ...,  621.5000,  621.5000,
            621.5000],
          [ 618.5000, 1097.0000, 1097.0000,  ...,  621.5000,  621.5000,
            621.5000]],
 
         [[ 260.0000,  228.0000,  228.0000,  ...,  201.5000,  231.0000,
            238.5000],
          [ 250.0000,  239.0000,  239.0000,  ...,  224.5000,  228.5000,
            227.5000],
          [ 203.0000,  249.0000,  249.0000,  ...,  227.0000,  209.0000,
            219.0000],
          ...,
          [ 982.5000,  894.0000,  894.0000,  ...,  618.5000,  676.0000,
            655.5000],
          [ 699.0

In [96]:
x_train[1].shape

torch.Size([12, 1024, 1024])

In [97]:

x_train_tensor = torch.stack(x_train, dim=0)  # Shape: [num_samples, 12, 1024, 1024]
y_train_tensor = torch.stack(y_train, dim=0).squeeze(1).long()   # Shape: [num_samples, 1, 1024, 1024]


In [98]:
if torch.isnan(x_train_tensor).any():
    print("Warning: Found NaN values in x_train_tensor!")
if torch.isinf(x_train_tensor).any():
    print("Warning: Found Inf values in x_train_tensor!")



In [99]:
x_train_tensor = torch.nan_to_num(x_train_tensor, nan=0.0)

In [100]:
means = x_train_tensor.mean(dim=(0, 2, 3))
stds = x_train_tensor.std(dim=(0, 2, 3))
print("Means:", means)
print("Stds:", stds)

eps = 1e-7
stds_fixed = stds + eps

normalizer_pipe = transforms.Normalize(means, stds_fixed)

preprocessor = transforms.Compose([
    normalizer_pipe
])

x_train_tensor = [preprocessor(img) for img in x_train_tensor]
x_train_tensor = torch.stack(x_train_tensor, dim=0)


Means: tensor([ 251.1917,  299.5903,  529.7758,  340.4770,  893.4138, 2481.4229,
        3080.3997, 3075.8416, 3370.4011, 3357.2368, 1802.1953,  854.8112])
Stds: tensor([161.9195, 190.9192, 226.0201, 268.8252, 311.6995, 595.2697, 747.8415,
        751.2203, 782.6572, 734.3943, 551.7997, 430.1828])


In [101]:
x_train_tensor.shape

torch.Size([10, 12, 1024, 1024])

In [102]:
type(x_train_tensor)

torch.Tensor

In [103]:
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)


In [104]:
x_train_tensor

tensor([[[[ 1.2851e-01,  1.0381e-01,  1.0381e-01,  ...,  5.7488e-02,
            5.7488e-02,  5.7488e-02],
          [-1.1235e-01, -2.1734e-01, -2.1734e-01,  ...,  9.7631e-02,
            9.7631e-02,  9.7631e-02],
          [-1.1235e-01, -2.1734e-01, -2.1734e-01,  ...,  9.7631e-02,
            9.7631e-02,  9.7631e-02],
          ...,
          [ 2.2685e+00,  2.3796e+00,  2.3796e+00,  ...,  2.2870e+00,
            2.2870e+00,  2.2870e+00],
          [ 2.2685e+00,  5.2236e+00,  5.2236e+00,  ...,  2.2870e+00,
            2.2870e+00,  2.2870e+00],
          [ 2.2685e+00,  5.2236e+00,  5.2236e+00,  ...,  2.2870e+00,
            2.2870e+00,  2.2870e+00]],

         [[-2.0737e-01, -3.7498e-01, -3.7498e-01,  ..., -5.1378e-01,
           -3.5926e-01, -3.1998e-01],
          [-2.5974e-01, -3.1736e-01, -3.1736e-01,  ..., -3.9331e-01,
           -3.7236e-01, -3.7760e-01],
          [-5.0592e-01, -2.6498e-01, -2.6498e-01,  ..., -3.8021e-01,
           -4.7450e-01, -4.2212e-01],
          ...,
     

In [105]:
batch_size = 10

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

### Training function

In [111]:
def train(n_epochs, optimizer, model, loss_fn, train_loader, val_loader=None):

    n_train_batch = len(train_loader)
    losses_train = []
    if val_loader is not None:
        n_val_batch = len(val_loader)
        losses_val = []

    model.train()
    optimizer.zero_grad()
    model = model.to(device)
    
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        loss_val = 0.0

        for imgs, labels in train_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)
            model.train()

            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            loss_train += loss.item()
        losses_train.append(loss_train / n_train_batch)

        if val_loader is not None:
            with torch.no_grad():
                for imgs, labels in val_loader:
                    model.eval()

                    outputs = model(imgs)
                    loss = loss_fn(outputs, labels)
                    loss_val += loss.item()

                losses_val.append(loss_val / n_val_batch)

        if epoch == 1 or epoch % 5 == 0:
            print(f'--------- Epoch: {epoch} ---------')
            print('Training loss {:.5f}'.format(loss_train / n_train_batch))
            if val_loader is not None:
                print('Validation loss {:.5f}'.format(loss_val / n_val_batch))
            print()

    return losses_train#, losses_val

###  A simple Convolutional Network

In [107]:
class SimpleConvNet(nn.Module):
    def __init__(self):
        super(SimpleConvNet, self).__init__()
        
        # Encoder: downsample from 1024x1024 to 256x256
        self.down1 = nn.Sequential(
            nn.Conv2d(in_channels=12, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)  # 1024 -> 512
        )
        self.down2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)  # 512 -> 256
        )
        
        # Decoder: upsample from 256x256 back to 1024x1024
        self.up1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, 
                                      kernel_size=2, stride=2)  # 256 -> 512
        self.up2 = nn.ConvTranspose2d(in_channels=32, out_channels=5,  # output 5 channels now
                                      kernel_size=2, stride=2)  # 512 -> 1024
        
    
    def forward(self, x):
        # [batch, 12, 1024, 1024]
        x = self.down1(x)    # [batch, 32, 512, 512]
        x = self.down2(x)    # [batch, 64, 256, 256]
        x = F.relu(self.up1(x))  # [batch, 32, 512, 512]
        x = self.up2(x)      # [batch, 5, 1024, 1024]
        return x


In [108]:
model = SimpleConvNet()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()


In [112]:
n_epochs = 10

losses_train = train(n_epochs, optimizer, model, loss_fn, train_loader)


--------- Epoch: 1 ---------
Training loss 1.55403

--------- Epoch: 5 ---------
Training loss 1.47091

--------- Epoch: 10 ---------
Training loss 1.26181

