In [1]:
import sys
sys.path.append("models/raster")
sys.path.append("models/vector")
sys.path.append("models/multimodal")

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sqlalchemy import create_engine
import geoalchemy2

from auxiliary.database import read_table_from_db_multiple_geoms
from auxiliary.config import db_username, db_password

from cnn import CNN
from vit import ViT
from dataset_raster import BuildingRasterDataset, npz_to_tensor
from initialize_gnn import initialize_gnn
from dataset_vector import BuildingVectorDataset, process_HeteroData

from model_multimodal import MultimodalModel

In [2]:
engine = create_engine(f"postgresql://{db_username}:{db_password}@localhost/genops")

In [3]:
# read buildings from database
buildings = read_table_from_db_multiple_geoms(engine, 
                                              "buildings_dkm25_to_dkm50_genops", 
                                              geom_cols=["source_geom", "target_geom"], 
                                              columns_to_select=["source_uuid",
                                                                 "source_geom",
                                                                 "target_uuid",
                                                                 "target_geom",
                                                                 "elimination",
                                                                 "aggregation",
                                                                 "typification",
                                                                 "displacement",
                                                                 "displacement_prob",
                                                                 "enlargement",
                                                                 "enlargement_prob",
                                                                 "simplification",
                                                                 "simplification_prob",
                                                                 "block_id"])

uuids_experimental = list(pd.read_csv("../data.nosync/balanced_data/experimental_uuids.csv")["uuid"])

buildings_experimental = buildings[buildings["source_uuid"].isin(uuids_experimental)].reset_index(drop=True)

In [4]:
# operators are always specified in this order
operator_order = ("elimination", "aggregation", "typification", "displacement", "enlargement", "simplification")
# features are always specified in this order
feature_order = ("area", 
                 "perimeter", 
                 "convexity", 
                 "eri", 
                 "orientation_mbr", 
                 "wall_average", 
                 "voronoi_area", 
                 "impact_area", 
                 "x_coord", 
                 "y_coord")

In [5]:
# Define DIN font for plots if working locally
if not torch.cuda.is_available():
    plt.rcParams["font.family"] = "DIN Alternate"

### Loading the data

In [6]:
# define path to test data for both raster and vector
path_to_raster_experimental_data = "../data.nosync/raster/training_data/experimental"
raster_filenames = os.listdir(path_to_raster_experimental_data)
path_to_vector_experimental_data = "../data.nosync/vector/training_data/experimental"
vector_filenames = os.listdir(path_to_vector_experimental_data)

# important features
features = ["area", 
            "perimeter", 
            "convexity", 
            "eri", 
            "orientation_mbr", 
            "wall_average", 
            "voronoi_area", 
            "impact_area", 
            "x_coord", 
            "y_coord"]

selection_operators = ["aggregation", "typification", "displacement", "enlargement"]

In [7]:
# creating Dataset objects to initialize GNNs and multimodal model
raster_eli_dataset = BuildingRasterDataset(path_to_raster_experimental_data, 
                                           operators=["elimination"], 
                                           attach_roads=True, 
                                           transform=None, 
                                           subset=None)

raster_sel_dataset = BuildingRasterDataset(path_to_raster_experimental_data, 
                                           operators=selection_operators, 
                                           attach_roads=True, 
                                           transform=None, 
                                           subset=None)

vector_eli_dataset = BuildingVectorDataset(path_to_vector_experimental_data, 
                                           operators=["elimination"],
                                           operator_order=operator_order, 
                                           features=features, 
                                           feature_order=feature_order, 
                                           attach_roads=True, 
                                           transform=None, 
                                           subset=None)

vector_sel_dataset = BuildingVectorDataset(path_to_vector_experimental_data, 
                                           operators=selection_operators,
                                           operator_order=operator_order, 
                                           features=features, 
                                           feature_order=feature_order, 
                                           attach_roads=True, 
                                           transform=None, 
                                           subset=None)

### Loading the trained models

In [8]:
# load the trained raster models
raster_model_path = "../data.nosync/raster/models"

raster_eli_model_name = "CNN_eli_attachRoadsTrue_4075585p_1000s_10ep_bs16.pth"
raster_eli_model = CNN(n_channels=3, n_classes=1)
raster_eli_checkpoint = torch.load(os.path.join(raster_model_path, "elimination", raster_eli_model_name))
raster_eli_model.load_state_dict(raster_eli_checkpoint["model_state_dict"])
raster_eli_model.eval()

raster_sel_model_name = "CNN_sel_attachRoadsTrue_8893252p_1000s_10ep_bs16.pth"
raster_sel_model = CNN(n_channels=3, n_classes=4)
raster_sel_checkpoint = torch.load(os.path.join(raster_model_path, "selection", raster_sel_model_name))
raster_sel_model.load_state_dict(raster_sel_checkpoint["model_state_dict"])
raster_sel_model.eval()
    
# load the trained vector models
vector_model_path = "../data.nosync/vector/models"

vector_eli_model_name = "HGT_eli_attachRoadsTrue_645539p_1000s_10ep_bs16.pth"
vector_eli_model = initialize_gnn(model="hgt", 
                                  sample=vector_eli_dataset.get(2), 
                                  hidden_channels=128, 
                                  num_heads=2,
                                  num_layers=2, 
                                  node_to_predict="focal_building")
vector_eli_checkpoint = torch.load(os.path.join(vector_model_path, "elimination", vector_eli_model_name))
vector_eli_model.load_state_dict(vector_eli_checkpoint["model_state_dict"])
vector_eli_model.eval()

vector_sel_model_name = "HGT_sel_attachRoadsTrue_695462p_1000s_10ep_bs16.pth"
vector_sel_model = initialize_gnn(model="hgt", 
                                  sample=vector_sel_dataset.get(2), 
                                  hidden_channels=128, 
                                  num_heads=2, 
                                  num_layers=2, 
                                  node_to_predict="focal_building")
vector_sel_checkpoint = torch.load(os.path.join(vector_model_path, "selection", vector_sel_model_name))
vector_sel_model.load_state_dict(vector_sel_checkpoint["model_state_dict"])
vector_sel_model.eval()

# load the trained multimodal models
multimodal_model_path = "../data.nosync/multimodal/models"

multimodal_eli_model_name = "MultimodalCNNHGT_eli_attachRoadsTrue_4720867p_1000s_10ep_bs16.pth"
multimodal_eli_model = MultimodalModel(raster_model=raster_eli_model, 
                                       vector_model=vector_eli_model, 
                                       dummy_raster_sample=raster_eli_dataset[0][0], 
                                       dummy_vector_sample=vector_eli_dataset.get(1), 
                                       n_classes=1)
multimodal_eli_checkpoint = torch.load(os.path.join(multimodal_model_path, "elimination", multimodal_eli_model_name))
multimodal_eli_model.load_state_dict(multimodal_eli_checkpoint["model_state_dict"])
multimodal_eli_model.eval()

multimodal_sel_model_name = "MultimodalCNNHGT_sel_attachRoadsTrue_9587686p_1000s_10ep_bs16.pth"
multimodal_sel_model = MultimodalModel(raster_model=raster_sel_model, 
                                       vector_model=vector_sel_model, 
                                       dummy_raster_sample=raster_sel_dataset[0][0], 
                                       dummy_vector_sample=vector_sel_dataset.get(1), 
                                       n_classes=4)
multimodal_sel_checkpoint = torch.load(os.path.join(multimodal_model_path, "selection", multimodal_sel_model_name))
multimodal_sel_model.load_state_dict(multimodal_sel_checkpoint["model_state_dict"])
multimodal_sel_model.eval()

print("Models successfully loaded.")

Number of node features: {'focal_building': 10, 'context_building': 10, 'road': 2}, 1 operators
Number of node features: {'focal_building': 10, 'context_building': 10, 'road': 2}, 4 operators
Models successfully loaded.


In [9]:
def predict_raster(elimination_model, selection_model, uuid, attach_roads=True):
    '''Computes a generalization operator prediction for a given UUID using the specified raster-based elimination and selection model.
    Returns a dictionary with the operators as keys and values 1 / 0 indicating their respective presence / absence.'''
    # get the file associated with the given uuid
    raster_filename = [file for file in raster_filenames if uuid in file][0]

    # load the raster file
    raster_sample_raw = np.load(os.path.join(path_to_raster_experimental_data, raster_filename))

    # convert loaded file to tensor
    raster_sample = npz_to_tensor(raster_sample_raw, attach_roads=attach_roads)

    # compute prediction through the elimination model
    pred_elimination_logits = elimination_model(raster_sample.unsqueeze(0))
    pred_elimination = torch.sigmoid(pred_elimination_logits)
    pred_elimination_label = (pred_elimination > 0.5).float().squeeze(0)
    
    if int(pred_elimination_label.item()) == 1:
        return {"elimination": 1, "aggregation": 0, "typification": 0, "displacement": 0, "enlargement": 0}

    operators_pred = {"elimination": 0}
    
    # for all retained buildings, compute prediction through the selection model
    pred_selection_logits = selection_model(raster_sample.unsqueeze(0))
    pred_selection = torch.sigmoid(pred_selection_logits)
    pred_selection_label = (pred_selection > 0.5).float().squeeze(0)

    for i, operator in enumerate(selection_operators):
        operators_pred[operator] = int(pred_selection_label[i].item())

    return operators_pred

def predict_vector(elimination_model, selection_model, uuid, features, feature_order, attach_roads=True):
    '''Computes a generalization operator prediction for a given UUID using the specified graph-based elimination and selection model.
    Returns a dictionary with the operators as keys and values 1 / 0 indicating their respective presence / absence.'''
    # get the file associated with the given uuid
    vector_filename = [file for file in vector_filenames if uuid in file][0]

    # load the vector file
    vector_sample_raw = torch.load(os.path.join(path_to_vector_experimental_data, vector_filename))
    
    # process the raw HeteroData object according to the specified information
    features_idx = sorted([feature_order.index(feature) for feature in features if feature in feature_order])
    vector_sample = process_HeteroData(vector_sample_raw,
                                       operators=[0,1,2,3,4,5], # operators do not matter -> take all
                                       features=features_idx,
                                       attach_roads=attach_roads)

    # compute prediction through the elimination model
    pred_elimination_logits = elimination_model(vector_sample.x_dict, vector_sample.edge_index_dict)
    pred_elimination = torch.sigmoid(pred_elimination_logits)
    pred_elimination_label = (pred_elimination > 0.5).float().squeeze(0)

    if int(pred_elimination_label.item()) == 1:
        return {"elimination": 1, "aggregation": 0, "typification": 0, "displacement": 0, "enlargement": 0}

    operators_pred = {"elimination": 0}
    
    # for all retained buildings, compute prediction through the selection model
    pred_selection_logits = selection_model(vector_sample.x_dict, vector_sample.edge_index_dict)
    pred_selection = torch.sigmoid(pred_selection_logits)
    pred_selection_label = (pred_selection > 0.5).float().squeeze(0)

    for i, operator in enumerate(selection_operators):
        operators_pred[operator] = int(pred_selection_label[i].item())

    return operators_pred

def predict_multimodal(elimination_model, selection_model, uuid, features, feature_order, attach_roads=True):
    # get the files associated with the given uuid
    raster_filename = [file for file in raster_filenames if uuid in file][0]
    vector_filename = [file for file in vector_filenames if uuid in file][0]

    # load the raster and vector files
    raster_sample_raw = np.load(os.path.join(path_to_raster_experimental_data, raster_filename))
    vector_sample_raw = torch.load(os.path.join(path_to_vector_experimental_data, vector_filename))

    # convert loaded file to tensor
    raster_sample = npz_to_tensor(raster_sample_raw, attach_roads=attach_roads)

    # process the raw HeteroData object according to the specified information
    features_idx = sorted([feature_order.index(feature) for feature in features if feature in feature_order])
    vector_sample = process_HeteroData(vector_sample_raw,
                                       operators=[0,1,2,3,4,5], # operators do not matter -> take all
                                       features=features_idx,
                                       attach_roads=attach_roads)

    # compute prediction through the elimination model
    pred_elimination_logits = elimination_model(raster_sample.unsqueeze(0), vector_sample)
    pred_elimination = torch.sigmoid(pred_elimination_logits)
    pred_elimination_label = (pred_elimination > 0.5).float().squeeze(0)

    if int(pred_elimination_label.item()) == 1:
        return {"elimination": 1, "aggregation": 0, "typification": 0, "displacement": 0, "enlargement": 0}

    operators_pred = {"elimination": 0}
    
    # for all retained buildings, compute prediction through the selection model
    pred_selection_logits = selection_model(raster_sample.unsqueeze(0), vector_sample)
    pred_selection = torch.sigmoid(pred_selection_logits)
    pred_selection_label = (pred_selection > 0.5).float().squeeze(0)

    for i, operator in enumerate(selection_operators):
        operators_pred[operator] = int(pred_selection_label[i].item())

    return operators_pred

In [10]:
# storing raster predictions
preds_raster = buildings_experimental["source_uuid"].apply(lambda uuid: predict_raster(raster_eli_model, 
                                                                                       raster_sel_model, 
                                                                                       uuid))
preds_raster_df = preds_raster.apply(pd.Series)
preds_raster_df.columns = ["pred_" + col + "_raster" for col in preds_raster_df.columns]
buildings_experimental = buildings_experimental.join(preds_raster_df)

In [11]:
# storing vector predictions
preds_vector = buildings_experimental["source_uuid"].apply(lambda uuid: predict_vector(vector_eli_model, 
                                                                                       vector_sel_model, 
                                                                                       uuid, 
                                                                                       features=features, 
                                                                                       feature_order=feature_order))
preds_vector_df = preds_vector.apply(pd.Series)
preds_vector_df.columns = ["pred_" + col + "_vector" for col in preds_vector_df.columns]
buildings_experimental = buildings_experimental.join(preds_vector_df)

In [12]:
# storing multimodal predictions
preds_multimodal = buildings_experimental["source_uuid"].apply(lambda uuid: predict_multimodal(multimodal_eli_model, 
                                                                                               multimodal_sel_model, 
                                                                                               uuid, 
                                                                                               features=features, 
                                                                                               feature_order=feature_order))
preds_multimodal_df = preds_multimodal.apply(pd.Series)
preds_multimodal_df.columns = ["pred_" + col + "_multimodal" for col in preds_multimodal_df.columns]
buildings_experimental = buildings_experimental.join(preds_multimodal_df)

In [13]:
buildings_experimental.head()

Unnamed: 0,source_uuid,source_geom,target_uuid,target_geom,elimination,aggregation,typification,displacement,displacement_prob,enlargement,...,pred_elimination_vector,pred_aggregation_vector,pred_typification_vector,pred_displacement_vector,pred_enlargement_vector,pred_elimination_multimodal,pred_aggregation_multimodal,pred_typification_multimodal,pred_displacement_multimodal,pred_enlargement_multimodal
0,{85318C34-58F2-4918-94A6-C9F57F1388F5},"POLYGON ((2699374.976 1111182.576, 2699387.823...",{3DD7631E-31D4-4F19-924D-C68C430D0CF2},"POLYGON ((2699353.94125 1111157.725000001, 269...",0,1,1,1,0.754299,1,...,0,1,0,1,1,0,1,0,0,1
1,{8995993F-DA45-4654-8A66-2DB443384549},"POLYGON ((2678235.300 1289236.329, 2678229.758...",{DA9E1927-2DDC-4871-BFDF-1404164C3363},"POLYGON ((2678239.43375 1289229.973749999, 267...",0,1,0,1,0.754299,0,...,0,1,0,1,0,0,1,0,1,0
2,{89629C3A-CE33-4FBA-A721-42879058462E},"POLYGON ((2694088.746 1142933.175, 2694078.935...",{AD50AB0B-6ED0-445F-88AA-1F0F1487A330},"POLYGON ((2694104.278749999 1142916.145, 26940...",0,1,1,1,0.754299,1,...,0,1,0,0,1,0,1,0,0,1
3,{8A31AED2-E820-40D6-90D2-CB37BD0E4CBD},"POLYGON ((2590072.970 1193204.782, 2590070.500...","{3B004F11-D7C6-4453-8C38-406940A5C99F},{E68F90...",MULTIPOLYGON (((2590025.998750001 1193194.2600...,0,1,1,1,0.754299,1,...,0,1,0,1,0,0,1,1,1,0
4,{8A0C61FA-B719-4164-9168-826B849404F7},"POLYGON ((2548280.323 1211864.241, 2548269.991...",{CC0807BA-38A0-4B3C-A8D9-378B2DFF3FC1},"POLYGON ((2548283.813749999 1211840.502500001,...",0,1,1,1,0.865735,1,...,1,0,0,0,0,0,0,0,1,1
