In [1]:
import sys
sys.path.append("models/raster")
sys.path.append("models/vector")
sys.path.append("models/multimodal")

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sqlalchemy import create_engine
import geoalchemy2

from auxiliary.database import read_table_from_db_multiple_geoms
from auxiliary.config import db_username, db_password

from cnn import CNN
from vit import ViT
from dataset_raster import BuildingRasterDataset, npz_to_tensor
from initialize_gnn import initialize_gnn
from dataset_vector import BuildingVectorDataset, process_HeteroData, get_dummy_sample

from model_multimodal import MultimodalModel

In [2]:
engine = create_engine(f"postgresql://{db_username}:{db_password}@localhost/genops")

In [3]:
device = torch.device("cpu")

print(f"Device set to: {device}")

Device set to: cpu


In [4]:
# read buildings from database
buildings = read_table_from_db_multiple_geoms(engine, 
                                              "buildings_dkm25_to_dkm50_genops", 
                                              geom_cols=["source_geom", "target_geom"], 
                                              columns_to_select=["source_uuid",
                                                                 "source_geom",
                                                                 "target_uuid",
                                                                 "target_geom",
                                                                 "elimination",
                                                                 "aggregation",
                                                                 "typification",
                                                                 "displacement",
                                                                 "displacement_prob",
                                                                 "enlargement",
                                                                 "enlargement_prob",
                                                                 "simplification",
                                                                 "simplification_prob",
                                                                 "block_id"])

uuids_experimental = list(pd.read_csv("../data.nosync/balanced_data/experimental_uuids.csv")["uuid"])

buildings_experimental = buildings[buildings["source_uuid"].isin(uuids_experimental)].reset_index(drop=True)

In [5]:
# operators are always specified in this order
operator_order = ("elimination", "aggregation", "typification", "displacement", "enlargement", "simplification")
# features are always specified in this order
feature_order = ("area", 
                 "perimeter", 
                 "convexity", 
                 "eri", 
                 "orientation_mbr", 
                 "wall_average", 
                 "voronoi_area", 
                 "impact_area", 
                 "x_coord", 
                 "y_coord")

In [6]:
# Define DIN font for plots if working locally
if not torch.cuda.is_available():
    plt.rcParams["font.family"] = "DIN Alternate"

### Loading the data

In [7]:
# define path to test data for both raster and vector
path_to_raster_experimental_data = "../data.nosync/raster/training_data/experimental"
raster_filenames = os.listdir(path_to_raster_experimental_data)
path_to_vector_experimental_data = "../data.nosync/vector/training_data/experimental"
vector_filenames = os.listdir(path_to_vector_experimental_data)

# important features
features = ["area", 
            "perimeter", 
            "convexity", 
            "eri", 
            "orientation_mbr", 
            "wall_average", 
            "voronoi_area", 
            "impact_area", 
            "x_coord", 
            "y_coord"]

selection_operators = ["aggregation", "typification", "displacement", "enlargement"]

In [8]:
# creating Dataset objects to initialize GNNs and multimodal model
raster_eli_dataset = BuildingRasterDataset(path_to_raster_experimental_data, 
                                           operators=["elimination"], 
                                           attach_roads=True, 
                                           transform=None, 
                                           subset=None)

raster_sel_dataset = BuildingRasterDataset(path_to_raster_experimental_data, 
                                           operators=selection_operators, 
                                           attach_roads=True, 
                                           transform=None, 
                                           subset=None)

vector_eli_dataset = BuildingVectorDataset(path_to_vector_experimental_data, 
                                           operators=["elimination"],
                                           operator_order=operator_order, 
                                           features=features, 
                                           feature_order=feature_order, 
                                           attach_roads=True, 
                                           transform=None, 
                                           subset=None)

vector_sel_dataset = BuildingVectorDataset(path_to_vector_experimental_data, 
                                           operators=selection_operators,
                                           operator_order=operator_order, 
                                           features=features, 
                                           feature_order=feature_order, 
                                           attach_roads=True, 
                                           transform=None, 
                                           subset=None)

# load dummy sample
vector_path = "../data.nosync/vector"
dummy_sample_path = os.path.join(vector_path, "training_data", "dummy_sample.pt")
dummy_sample_eli = get_dummy_sample(dummy_sample_path, 
                                    operators=["elimination"], 
                                    operator_order=operator_order,
                                    features=features, 
                                    feature_order=feature_order,
                                    attach_roads=True)
dummy_sample_sel = get_dummy_sample(dummy_sample_path, 
                                    operators=selection_operators, 
                                    operator_order=operator_order,
                                    features=features, 
                                    feature_order=feature_order,
                                    attach_roads=True)

### Loading the trained models

In [9]:
# load the trained raster models
raster_model_path = "../data.nosync/raster/models"

raster_eli_model_name = "CNN_eli_attachRoadsTrue_4075585p_1000s_10ep_bs16.pth"
raster_eli_model = CNN(n_channels=3, n_classes=1)
raster_eli_checkpoint = torch.load(os.path.join(raster_model_path, "elimination", raster_eli_model_name), map_location=device)
raster_eli_model.load_state_dict(raster_eli_checkpoint["model_state_dict"])
raster_eli_model.eval()

raster_sel_model_name = "CNN_sel_attachRoadsTrue_8893252p_1000s_10ep_bs16.pth"
raster_sel_model = CNN(n_channels=3, n_classes=4)
raster_sel_checkpoint = torch.load(os.path.join(raster_model_path, "selection", raster_sel_model_name), map_location=device)
raster_sel_model.load_state_dict(raster_sel_checkpoint["model_state_dict"])
raster_sel_model.eval()
    
# load the trained vector models
vector_model_path = "../data.nosync/vector/models"

vector_eli_model_name = "HGT_eli_attachRoadsTrue_645539p_1000s_10ep_bs16.pth"
vector_eli_model = initialize_gnn(model="hgt", 
                                  sample=dummy_sample_eli, 
                                  hidden_channels=128, 
                                  num_heads=2,
                                  num_layers=2, 
                                  node_to_predict="focal_building")
vector_eli_checkpoint = torch.load(os.path.join(vector_model_path, "elimination", vector_eli_model_name), map_location=device)
vector_eli_model.load_state_dict(vector_eli_checkpoint["model_state_dict"])
vector_eli_model.eval()

vector_sel_model_name = "HGT_sel_attachRoadsTrue_695462p_1000s_10ep_bs16.pth"
vector_sel_model = initialize_gnn(model="hgt", 
                                  sample=dummy_sample_sel, 
                                  hidden_channels=128, 
                                  num_heads=2, 
                                  num_layers=2, 
                                  node_to_predict="focal_building")
vector_sel_checkpoint = torch.load(os.path.join(vector_model_path, "selection", vector_sel_model_name), map_location=device)
vector_sel_model.load_state_dict(vector_sel_checkpoint["model_state_dict"])
vector_sel_model.eval()

# load the trained multimodal models
multimodal_model_path = "../data.nosync/multimodal/models"

multimodal_eli_model_name = "MultimodalCNNHGT_eli_attachRoadsTrue_4720867p_1000s_10ep_bs16.pth"
multimodal_eli_model = MultimodalModel(raster_model=raster_eli_model, 
                                       vector_model=vector_eli_model, 
                                       dummy_raster_sample=raster_eli_dataset[0][0], 
                                       dummy_vector_sample=dummy_sample_eli, 
                                       n_classes=1)
multimodal_eli_checkpoint = torch.load(os.path.join(multimodal_model_path, "elimination", multimodal_eli_model_name), map_location=device)
multimodal_eli_model.load_state_dict(multimodal_eli_checkpoint["model_state_dict"])
multimodal_eli_model.eval()

multimodal_sel_model_name = "MultimodalCNNHGT_sel_attachRoadsTrue_9587686p_1000s_10ep_bs16.pth"
multimodal_sel_model = MultimodalModel(raster_model=raster_sel_model, 
                                       vector_model=vector_sel_model, 
                                       dummy_raster_sample=raster_sel_dataset[0][0], 
                                       dummy_vector_sample=dummy_sample_sel, 
                                       n_classes=4)
multimodal_sel_checkpoint = torch.load(os.path.join(multimodal_model_path, "selection", multimodal_sel_model_name), map_location=device)
multimodal_sel_model.load_state_dict(multimodal_sel_checkpoint["model_state_dict"])
multimodal_sel_model.eval()

print("Models successfully loaded.")

Number of node features: {'focal_building': 10, 'context_building': 10, 'road': 2}, 1 operators
Number of node features: {'focal_building': 10, 'context_building': 10, 'road': 2}, 4 operators
Models successfully loaded.


In [10]:
def predict_raster(elimination_model, selection_model, uuid, attach_roads=True):
    '''Computes a generalization operator prediction for a given UUID using the specified raster-based elimination and selection model.
    Returns a dictionary with the operators as keys and values 1 / 0 indicating their respective presence / absence.'''
    # get the file associated with the given uuid
    raster_filename = [file for file in raster_filenames if uuid in file][0]

    # load the raster file
    raster_sample_raw = np.load(os.path.join(path_to_raster_experimental_data, raster_filename))

    # convert loaded file to tensor
    raster_sample = npz_to_tensor(raster_sample_raw, attach_roads=attach_roads)

    # compute prediction through the elimination model
    pred_elimination_logits = elimination_model(raster_sample.unsqueeze(0))
    pred_elimination = torch.sigmoid(pred_elimination_logits)
    pred_elimination_label = (pred_elimination > 0.5).float().squeeze(0)
    
    if int(pred_elimination_label.item()) == 1:
        return {"elimination": 1, "aggregation": 0, "typification": 0, "displacement": 0, "enlargement": 0}

    operators_pred = {"elimination": 0}
    
    # for all retained buildings, compute prediction through the selection model
    pred_selection_logits = selection_model(raster_sample.unsqueeze(0))
    pred_selection = torch.sigmoid(pred_selection_logits)
    pred_selection_label = (pred_selection > 0.5).float().squeeze(0)

    for i, operator in enumerate(selection_operators):
        operators_pred[operator] = int(pred_selection_label[i].item())

    return operators_pred

def predict_vector(elimination_model, selection_model, uuid, features, feature_order, attach_roads=True):
    '''Computes a generalization operator prediction for a given UUID using the specified graph-based elimination and selection model.
    Returns a dictionary with the operators as keys and values 1 / 0 indicating their respective presence / absence.'''
    # get the file associated with the given uuid
    vector_filename = [file for file in vector_filenames if uuid in file][0]

    # load the vector file
    vector_sample_raw = torch.load(os.path.join(path_to_vector_experimental_data, vector_filename))
    
    # process the raw HeteroData object according to the specified information
    features_idx = sorted([feature_order.index(feature) for feature in features if feature in feature_order])
    vector_sample = process_HeteroData(vector_sample_raw,
                                       operators=[0,1,2,3,4,5], # operators do not matter -> take all
                                       features=features_idx,
                                       attach_roads=attach_roads)

    # compute prediction through the elimination model
    pred_elimination_logits = elimination_model(vector_sample.x_dict, vector_sample.edge_index_dict)
    pred_elimination = torch.sigmoid(pred_elimination_logits)
    pred_elimination_label = (pred_elimination > 0.5).float().squeeze(0)

    if int(pred_elimination_label.item()) == 1:
        return {"elimination": 1, "aggregation": 0, "typification": 0, "displacement": 0, "enlargement": 0}

    operators_pred = {"elimination": 0}
    
    # for all retained buildings, compute prediction through the selection model
    pred_selection_logits = selection_model(vector_sample.x_dict, vector_sample.edge_index_dict)
    pred_selection = torch.sigmoid(pred_selection_logits)
    pred_selection_label = (pred_selection > 0.5).float().squeeze(0)

    for i, operator in enumerate(selection_operators):
        operators_pred[operator] = int(pred_selection_label[i].item())

    return operators_pred

def predict_multimodal(elimination_model, selection_model, uuid, features, feature_order, attach_roads=True):
    # get the files associated with the given uuid
    raster_filename = [file for file in raster_filenames if uuid in file][0]
    vector_filename = [file for file in vector_filenames if uuid in file][0]

    # load the raster and vector files
    raster_sample_raw = np.load(os.path.join(path_to_raster_experimental_data, raster_filename))
    vector_sample_raw = torch.load(os.path.join(path_to_vector_experimental_data, vector_filename))

    # convert loaded file to tensor
    raster_sample = npz_to_tensor(raster_sample_raw, attach_roads=attach_roads)

    # process the raw HeteroData object according to the specified information
    features_idx = sorted([feature_order.index(feature) for feature in features if feature in feature_order])
    vector_sample = process_HeteroData(vector_sample_raw,
                                       operators=[0,1,2,3,4,5], # operators do not matter -> take all
                                       features=features_idx,
                                       attach_roads=attach_roads)

    # compute prediction through the elimination model
    pred_elimination_logits = elimination_model(raster_sample.unsqueeze(0), vector_sample)
    pred_elimination = torch.sigmoid(pred_elimination_logits)
    pred_elimination_label = (pred_elimination > 0.5).float().squeeze(0)

    if int(pred_elimination_label.item()) == 1:
        return {"elimination": 1, "aggregation": 0, "typification": 0, "displacement": 0, "enlargement": 0}

    operators_pred = {"elimination": 0}
    
    # for all retained buildings, compute prediction through the selection model
    pred_selection_logits = selection_model(raster_sample.unsqueeze(0), vector_sample)
    pred_selection = torch.sigmoid(pred_selection_logits)
    pred_selection_label = (pred_selection > 0.5).float().squeeze(0)

    for i, operator in enumerate(selection_operators):
        operators_pred[operator] = int(pred_selection_label[i].item())

    return operators_pred

In [11]:
# storing raster predictions
preds_raster = buildings_experimental["source_uuid"].apply(lambda uuid: predict_raster(raster_eli_model, 
                                                                                       raster_sel_model, 
                                                                                       uuid))
preds_raster_df = preds_raster.apply(pd.Series)
preds_raster_df.columns = ["pred_" + col + "_raster" for col in preds_raster_df.columns]
buildings_experimental = buildings_experimental.join(preds_raster_df)

In [12]:
# storing vector predictions
preds_vector = buildings_experimental["source_uuid"].apply(lambda uuid: predict_vector(vector_eli_model, 
                                                                                       vector_sel_model, 
                                                                                       uuid, 
                                                                                       features=features, 
                                                                                       feature_order=feature_order))
preds_vector_df = preds_vector.apply(pd.Series)
preds_vector_df.columns = ["pred_" + col + "_vector" for col in preds_vector_df.columns]
buildings_experimental = buildings_experimental.join(preds_vector_df)

In [13]:
# storing multimodal predictions
preds_multimodal = buildings_experimental["source_uuid"].apply(lambda uuid: predict_multimodal(multimodal_eli_model, 
                                                                                               multimodal_sel_model, 
                                                                                               uuid, 
                                                                                               features=features, 
                                                                                               feature_order=feature_order))
preds_multimodal_df = preds_multimodal.apply(pd.Series)
preds_multimodal_df.columns = ["pred_" + col + "_multimodal" for col in preds_multimodal_df.columns]
buildings_experimental = buildings_experimental.join(preds_multimodal_df)

In [14]:
buildings_experimental.head()

Unnamed: 0,source_uuid,source_geom,target_uuid,target_geom,elimination,aggregation,typification,displacement,displacement_prob,enlargement,...,pred_elimination_vector,pred_aggregation_vector,pred_typification_vector,pred_displacement_vector,pred_enlargement_vector,pred_elimination_multimodal,pred_aggregation_multimodal,pred_typification_multimodal,pred_displacement_multimodal,pred_enlargement_multimodal
0,{000AE6EF-44E7-45FC-8A15-DC390E9F2119},"POLYGON ((2638742.691 1238818.351, 2638741.175...",{CACF96F5-D1DE-4CC3-A518-D2D04DCF1ACE},"POLYGON ((2638749.786249999 1238830.59375, 263...",0,1,0,1,0.754299,1,...,0,1,0,1,1,0,1,0,1,1
1,{000AEA34-2786-4950-82C9-D7521EAA7350},"POLYGON ((2724794.582 1083525.176, 2724797.008...",{299FFA96-0318-4231-93A2-0F38DC170A12},"POLYGON ((2724770.732500002 1083516.780000001,...",0,0,0,1,0.754299,1,...,0,0,0,1,1,0,0,0,1,1
2,{001EFAD2-E9DC-4497-898E-F0FFFF9EC265},"POLYGON ((2555575.611 1203683.451, 2555581.674...",,GEOMETRYCOLLECTION EMPTY,1,0,0,0,1.0,0,...,1,0,0,0,0,1,0,0,0,0
3,{0047F67E-1D97-4D21-BE85-27841DB321C0},"POLYGON ((2593472.703 1212791.095, 2593452.276...",{290D2143-EE95-4D50-A86D-1B0DE0F73A19},"POLYGON ((2593477.938749999 1212779.701250002,...",0,0,0,1,0.754299,1,...,0,1,0,1,1,0,1,0,1,1
4,{0102686F-8E80-4367-918E-4CD84EBBD653},"POLYGON ((2537294.931 1152988.837, 2537337.151...",{53D768C9-BBED-4553-9321-F0F6CA7633E5},"POLYGON ((2537285.1325 1152988.896249998, 2537...",0,0,0,1,0.754299,0,...,0,1,0,0,0,0,1,0,1,0
