In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.amp import GradScaler, autocast
import numpy as np
import matplotlib.pyplot as plt
from time import time
import os
from torchvision import models

In [2]:
# Define the device (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print(f'Using device: {device}')

Using device: cuda


In [3]:
deeplab = models.segmentation.deeplabv3_mobilenet_v3_large(weights=models.segmentation.deeplabv3.DeepLabV3_MobileNet_V3_Large_Weights.DEFAULT)

In [4]:
deeplab.classifier[4] = torch.nn.Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1))

In [5]:
deeplab.load_state_dict(torch.load('/work/cssema416/202510/03/models/deeplabV3_1.pt'))

  deeplab.load_state_dict(torch.load('/work/cssema416/202510/03/models/deeplabV3_1.pt'))


<All keys matched successfully>

In [6]:
deeplab.eval()
deeplab.to(device)

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride

In [7]:
dataset_save_location = '/work/cssema416/202510/03/ai4mars-numpy-dataset'

In [8]:
# Define function that plots multiple images in an array of images
def display(image, labels):
    label_names = ["Lander"]
    fig, axes = plt.subplots(1,1+labels.shape[0], figsize=(20,20))
    axes[0].set_axis_off()
    axes[0].imshow(np.squeeze(image), cmap='grey', vmin=-1,vmax=1)
    axes[0].set_title("Raw image")
    for i in range(labels.shape[0]):
        label = labels[i]
        axes[i+1].set_axis_off()
        axes[i+1].imshow(np.squeeze(image), cmap='grey',vmin=-1,vmax=1)
        axes[i+1].imshow(np.squeeze(label), cmap='viridis',vmin=0,vmax=1, alpha=0.25)
        axes[i+1].set_title(label_names[i])

In [9]:
class AI4MarsDataset(Dataset):
    def __init__(self):
        self.images = np.memmap(os.path.join(dataset_save_location, 'images.dat'), dtype=np.float32, mode="r", shape=(16064,1024,1024))
        self.labels = np.memmap(os.path.join(dataset_save_location, 'labels.dat'), dtype=np.float32, mode="r", shape=(16064,4,1024,1024))

    def __len__(self):
        return self.images.shape[0]
    
    def __getitem__(self, index):
        img = self.images[index]
        img = (img+1)/2.0
        img = torch.tensor(img, dtype=torch.float32)
        img = img.repeat(3,1,1)
        return img, torch.tensor(self.labels[index])

In [38]:
train_dataset = AI4MarsDataset()
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=16, pin_memory=True)

In [31]:
# Loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(deeplab.parameters(), lr=1e-4)
scaler = GradScaler(device)

In [22]:
# Freeze the backbone parameters
for param in deeplab.backbone.parameters():
    param.requires_grad = False

In [23]:
len(train_dataset)

16064

In [None]:
# Number of epochs
num_epochs = 10
start_time = time()
# Training loop
for epoch in range(num_epochs):
    deeplab.train()  # Set model to training mode
    running_loss = 0.0
    
    for i, (images, masks) in enumerate(train_loader):
        # Move data to device
        images = images.to(device)
        masks = masks.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        with autocast('cuda'):
            # Forward pass
            outputs = deeplab(images)["out"]  # Use only the 'out' output
            # Calculate loss
            loss = criterion(outputs, masks)
        
        # Backward pass and optimize
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        running_loss += loss.item() * images.size(0)

        elapsed_time = (time()-start_time)/60
        total_batches_run = epoch*len(train_loader) + (i+1)
        total_batches_to_run = num_epochs*len(train_loader)
        total_time = (total_batches_to_run/total_batches_run)*elapsed_time
        remaining_time = total_time - elapsed_time
        
        if(i%5==4): 
            print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}, Elapsed Time: {elapsed_time:.1f} min, Remaining Time: {remaining_time:.1f} min", end='\x1b[1K\r')
        
    torch.save(deeplab.state_dict(), "/work/cssema416/202510/03/models/deeplabV3_4.pt")
    
    # Calculate average loss for the epoch
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Elapsed Time: {(time()-start_time)/60:.1f}")
    print()
print("Done!")

Epoch [1/10], Loss: 0.0247, Elapsed Time: 17.7 Elapsed Time: 17.5 min, Remaining Time: 158.6 min[1K

Epoch [2/10], Loss: 0.0232, Elapsed Time: 35.3 Elapsed Time: 35.2 min, Remaining Time: 141.2 min[1K

Epoch [3/10], Loss: 0.0214, Elapsed Time: 53.0 Elapsed Time: 52.9 min, Remaining Time: 123.6 min[1K

Epoch [4/10], Loss: 0.0202, Elapsed Time: 70.8 Elapsed Time: 70.7 min, Remaining Time: 106.2 min[1K

Epoch [5/10], Loss: 0.0200, Elapsed Time: 88.5 Elapsed Time: 88.4 min, Remaining Time: 88.6 min[1K

Epoch [6/10], Loss: 0.0180, Elapsed Time: 106.3Elapsed Time: 106.2 min, Remaining Time: 70.9 min[1K

Epoch [7/10], Loss: 0.0170, Elapsed Time: 124.0Elapsed Time: 123.9 min, Remaining Time: 53.2 min[1K

Epoch [8/10], Batch [295/1004], Loss: 0.0110, Elapsed Time: 129.3 min, Remaining Time: 48.0 min[1K

In [None]:
deeplab.eval()
img, label = train_dataset[1]
img = img.to(device)
with torch.no_grad():
    outputs = deeplab(img.unsqueeze(0))
output_logits = torch.squeeze(outputs['out']).cpu().data
_, predicted = torch.max(output_logits, 0)
max_vals, label_sparse = torch.max(label, 0)
label_sparse[max_vals==0] = -1
total = (label_sparse>=0).sum().item()
correct = (predicted == label_sparse).sum().item()
one_hot_encoded = np.eye(4)[predicted].T
one_hot_encoded = np.transpose(one_hot_encoded, (0,2,1))
display(img.cpu()[0],one_hot_encoded)
display(img.cpu()[0],label)

In [40]:
# Testing loop
def test_model(model, test_dataset):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    print()
    with torch.no_grad():
        for i, (images, labels) in enumerate(test_dataset):
            images = images.to(device)
            outputs = model(images.unsqueeze(0))
            output_logits = torch.squeeze(outputs['out']).cpu().data
            _, predicted = torch.max(output_logits, 0)
            max_vals, label_sparse = torch.max(labels, 0)
            label_sparse[max_vals==0] = -1
            total += (label_sparse>=0).sum().item()
            correct += (predicted == label_sparse).sum().item()
            if(i % 100 == 0): 
                print(f"[{i+1}/{len(test_dataset)}] {100*correct/total:.2f}%", end='\r')

    accuracy = 100 * correct / total
    print(f'Test Accuracy of the model on the test dataset: {accuracy:.2f}%')

test_model(deeplab, train_dataset)


Test Accuracy of the model on the test dataset: 97.97%
