In [1]:
import sys
sys.path.append("models/raster")
sys.path.append("models/vector")

import os
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from initialize_gnn import initialize_gnn
from cnn import CNN
from vit import ViT

In [2]:
# defining a seed for reproducible results
np.random.seed(69)

In [3]:
# Check if CUDA is available, then MPS, otherwise use CPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.empty_cache()
    # cluster path
    multimodal_path = "../scratch/multimodal"
else:
    device = torch.device("cpu")
    # local path
    multimodal_path = "../data.nosync/multimodal"

print(f"Device set to: {device}")

Device set to: cpu


In [4]:
# operators are always specified in this order
operator_order = ("elimination", "aggregation", "typification", "displacement", "enlargement", "simplification")

In [5]:
# Define DIN font for plots if working locally
if not torch.cuda.is_available():
    plt.rcParams["font.family"] = "DIN Alternate"

### Loading the data

In [6]:
class BuildingMultimodalDataset(Dataset):
    def __init__(self, raster_path, vector_path, operators, transform=None):
        '''Stores the directory and filenames of the individual raster (.npz) and vector (.pt) files.'''
        # store the path to the raster and vector files
        self.raster_path = raster_path
        self.vector_path = vector_path

        # get filenames of the individual files, sort the filenames to make them line up
        self.raster_filenames = sorted(os.listdir(self.raster_path))
        self.vector_filenames = sorted(os.listdir(self.vector_path))

        # make sure that the samples line up
        assert len(self.raster_filenames) == len(self.vector_filenames)

        # store indices of the operators within operator_order for slicing in the __getitem__ method
        self.operators = sorted([operator_order.index(operator) for operator in operators if operator in operator_order])

        # store transformation
        self.transform = transform

    def __len__(self):
        '''Enables dataset length calculation.'''
        return len(self.raster_filenames)

    def __getitem__(self, index):
        '''Enables indexing, returns graph and raster representation and generalization operator as label.'''
        # load the raster sample associated with the given index
        raster_filename = self.raster_filenames[index]
        raster_sample_raw = np.load(os.path.join(self.raster_path, raster_filename))

        # extract the rasters
        focal_building_raster = raster_sample_raw["focal_building"]
        context_buildings_raster = raster_sample_raw["context_buildings"]
        roads_raster = raster_sample_raw["roads"]

        # stack the rasters to shape (3, n_pixels, n_pixels) and convert to tensor
        raster_sample = np.stack([focal_building_raster, context_buildings_raster, roads_raster], axis=0)
        raster_sample = torch.from_numpy(raster_sample).float()

        # load the vector sample associated with the given index
        vector_filename = self.vector_filenames[index]
        vector_sample = torch.load(os.path.join(self.vector_path, vector_filename))

        # extract the operators from the graph object
        operators = vector_sample.y[self.operators]

        # reshape the operators associated with the graph
        vector_sample.y = vector_sample.y[self.operators].reshape(1, -1)

        return raster_sample, vector_sample, operators

### Model design

In [7]:
class MultimodalModel(nn.Module):
    def __init__(self, raster_model, vector_model, dummy_raster_sample, dummy_vector_sample, n_classes):
        super(MultimodalModel, self).__init__()
        self.raster_model = raster_model
        self.vector_model = vector_model
        
        # both models are already trained and only require gradient for fusion layers
        for param in self.raster_model.parameters():
            param.requires_grad = False
        for param in self.vector_model.parameters():
            param.requires_grad = False

        # remove classification heads
        self.raster_model.classification_heads = nn.Identity()
        self.vector_model.classification_heads = nn.Identity()

        # pass dummy raster and dummy vector samples through the networks to determine the number of output features
        # when the classification heads are missing
        out_raster = self.raster_model(dummy_raster_sample.unsqueeze(0))
        out_vector = self.vector_model(dummy_vector_sample.x_dict, dummy_vector_sample.edge_index_dict)
        n_raster_features = out_raster.shape[1]
        n_vector_features = out_vector.shape[1]
        
        # fusion layer
        self.fusion_layer = nn.Linear(n_raster_features + n_vector_features, n_classes)

    def forward(self, raster, graph):
        raster_output = self.raster_model(raster)
        vector_output = self.vector_model(graph.x_dict, graph.edge_index_dict)

        # concatenate along feature dimension
        combined_features = torch.cat((raster_output, vector_output), dim=1)
        result = self.fusion_layer(combined_features)
        return result

### Elimination model

In [8]:
# define path to training, validation and test data for both raster and vector
path_to_raster_training_data = "../data.nosync/raster/training_data/elimination/training"
path_to_vector_training_data = "../data.nosync/vector/training_data/elimination/training"
path_to_raster_validation_data = "../data.nosync/raster/training_data/elimination/validation"
path_to_vector_validation_data = "../data.nosync/vector/training_data/elimination/validation"
path_to_raster_test_data = "../data.nosync/raster/training_data/elimination/test"
path_to_vector_test_data = "../data.nosync/vector/training_data/elimination/test"

# define input parameters
elimination_operators = ["elimination"]
n_classes = len(elimination_operators)

batch_size = 16

# construct training DataLoader
training_set = BuildingMultimodalDataset(path_to_raster_training_data, path_to_vector_training_data, operators=elimination_operators)
training_loader = DataLoader(dataset=training_set, batch_size=batch_size, shuffle=True)

# construct validation DataLoader (no shuffling)
validation_set = BuildingMultimodalDataset(path_to_raster_validation_data, path_to_vector_validation_data, operators=elimination_operators)
validation_loader = DataLoader(dataset=validation_set, batch_size=batch_size, shuffle=False)

# construct test DataLoader (no shuffling)
test_set = BuildingMultimodalDataset(path_to_raster_test_data, path_to_vector_test_data, operators=elimination_operators)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)

print(f"{len(training_set):,} samples in the training set.")
print(f"{len(validation_set):,} samples in the validation set.")
print(f"{len(test_set):,} samples in the test set.")

1,000 samples in the training set.
250 samples in the validation set.
250 samples in the test set.


In [9]:
# load the trained raster model
raster_model_path = "../data.nosync/raster/models/elimination"
#raster_model_name = "CNN_eli_attachRoadsTrue_2190433p_1000s_10ep_bs16.pth"
#raster_model = CNN(n_channels=3, n_classes=1)
raster_model_name = "ViT_eli_attachRoadsTrue_26820129p_1000s_10ep_bs16.pth"
raster_model = ViT(channels=3, num_classes=1)
raster_checkpoint = torch.load(os.path.join(raster_model_path, raster_model_name))
raster_model.load_state_dict(raster_checkpoint["model_state_dict"])
raster_model.eval()
    
# load the trained vector model
vector_model_path = "../data.nosync/vector/models/elimination"
#vector_model_name = "HGNN_eli_attachRoadsTrue_253313p_1000s_10ep_bs16.pth"
#vector_model = initialize_gnn(model="hgnn", sample=training_set[2][1], hidden_channels=128, num_layers=2, node_to_predict="focal_building")
vector_model_name = "HGT_eli_attachRoadsTrue_637219p_1000s_10ep_bs16.pth"
vector_model = initialize_gnn(model="hgt", sample=training_set[2][1], hidden_channels=128, num_heads=2, 
                              num_layers=2, node_to_predict="focal_building")
vector_checkpoint = torch.load(os.path.join(vector_model_path, vector_model_name))
vector_model.load_state_dict(vector_checkpoint["model_state_dict"])
vector_model.eval()

print("Models successfully loaded.")

Number of node features: {'focal_building': 10, 'context_building': 10, 'road': 2}, 1 operators
Models successfully loaded.


In [10]:
# initialize the multimodal model
multimodal_model = MultimodalModel(raster_model, 
                                   vector_model, 
                                   dummy_raster_sample=training_set[0][0], 
                                   dummy_vector_sample=training_set[2][1], 
                                   n_classes=n_classes)

In [11]:
multimodal_model(training_set[0][0].unsqueeze(0), training_set[2][1])

tensor([[-0.0357]], grad_fn=<AddmmBackward0>)

### Selection model

In [12]:
# define path to training, validation and test data for both raster and vector
path_to_raster_training_data = "../data.nosync/raster/training_data/selection/training"
path_to_vector_training_data = "../data.nosync/vector/training_data/selection/training"
path_to_raster_validation_data = "../data.nosync/raster/training_data/selection/validation"
path_to_vector_validation_data = "../data.nosync/vector/training_data/selection/validation"
path_to_raster_test_data = "../data.nosync/raster/training_data/selection/test"
path_to_vector_test_data = "../data.nosync/vector/training_data/selection/test"

# define input parameters
selection_operators = ["aggregation", "typification", "displacement", "enlargement"]
n_classes = len(selection_operators)

batch_size = 16

# construct training DataLoader
training_set = BuildingMultimodalDataset(path_to_raster_training_data, path_to_vector_training_data, operators=selection_operators)
training_loader = DataLoader(dataset=training_set, batch_size=batch_size, shuffle=True)

# construct validation DataLoader (no shuffling)
validation_set = BuildingMultimodalDataset(path_to_raster_validation_data, path_to_vector_validation_data, operators=selection_operators)
validation_loader = DataLoader(dataset=validation_set, batch_size=batch_size, shuffle=False)

# construct test DataLoader (no shuffling)
test_set = BuildingMultimodalDataset(path_to_raster_test_data, path_to_vector_test_data, operators=selection_operators)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)

print(f"{len(training_set):,} samples in the training set.")
print(f"{len(validation_set):,} samples in the validation set.")
print(f"{len(test_set):,} samples in the test set.")

1,000 samples in the training set.
250 samples in the validation set.
250 samples in the test set.


In [13]:
# load the trained raster model
raster_model_path = "../data.nosync/raster/models/selection"
#raster_model_name = "CNN_sel_attachRoadsTrue_8481988p_1000s_10ep_bs16.pth"
#raster_model = CNN(n_channels=3, n_classes=4)
raster_model_name = "ViT_sel_attachRoadsTrue_26844804p_1000s_10ep_bs16.pth"
raster_model = ViT(channels=3, num_classes=4)
raster_checkpoint = torch.load(os.path.join(raster_model_path, raster_model_name))
raster_model.load_state_dict(raster_checkpoint["model_state_dict"])
raster_model.eval()
    
# load the trained vector model
vector_model_path = "../data.nosync/vector/models/selection"
#vector_model_name = "HGNN_sel_attachRoadsTrue_278276p_1000s_10ep_bs16.pth"
#vector_model = initialize_gnn(model="hgnn", sample=training_set[0][1], hidden_channels=128, num_layers=2, node_to_predict="focal_building")
vector_model_name = "HGT_sel_attachRoadsTrue_662182p_1000s_10ep_bs16.pth"
vector_model = initialize_gnn(model="hgt", sample=training_set[0][1], hidden_channels=128, num_heads=2, 
                              num_layers=2, node_to_predict="focal_building")
vector_checkpoint = torch.load(os.path.join(vector_model_path, vector_model_name))
vector_model.load_state_dict(vector_checkpoint["model_state_dict"])
vector_model.eval()

print("Models successfully loaded.")

Number of node features: {'focal_building': 10, 'context_building': 10, 'road': 2}, 4 operators
Models successfully loaded.


In [14]:
# initialize the multimodal model
multimodal_model = MultimodalModel(raster_model, 
                                   vector_model, 
                                   dummy_raster_sample=training_set[0][0], 
                                   dummy_vector_sample=training_set[0][1], 
                                   n_classes=n_classes)

In [15]:
multimodal_model(training_set[0][0].unsqueeze(0), training_set[0][1])

tensor([[ 0.0339, -0.1297,  0.6741,  0.4333]], grad_fn=<AddmmBackward0>)