In [None]:
import geopandas as gpd
import osmnx as ox
from IPython.display import display
import numpy as np
from tqdm.auto import tqdm
import pandas as pd

from srai.embedders import Highway2VecEmbedder
from srai.loaders import osm_way_loader

## Basic usage

In [None]:
# embedder = Highway2VecEmbedder()

In [None]:
gdf_place = ox.geocode_to_gdf("Wrocław, Poland")
gdf_place.plot()

In [None]:
from srai.regionizers import H3Regionizer

gdf_regions = H3Regionizer(7).transform(gdf_place)
gdf_regions.plot()

In [None]:
osmnx_road_infrastructure_tags = [
  "bridge",
  "tunnel",
  "oneway",
  "lanes",
  "ref",
  "name",
  "highway",
  "maxspeed",
  "service",
  "access",
  "area",
  "landuse",
  "width",
  "est_width",
  "junction",

  # missing in the original config
  "surface",
  "footway",
  "bicycle",
  "lit",
]

network_type = "drive"

In [None]:
ox.settings.useful_tags_way = osmnx_road_infrastructure_tags
ox.settings.timeout = 10000

In [None]:
polygon = gdf_place["geometry"][0]  # TODO: make it work on multiple polygons
G_directed = ox.graph_from_polygon(polygon, network_type=network_type, retain_all=True, clean_periphery=True)
G = ox.utils_graph.get_undirected(G_directed)  # FIXME: takes a really long time, which is weird. Maybe try dropping 'reversed' rows instead
ox.plot_graph(G)

In [None]:
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)

In [None]:
ax = gdf_edges.plot(linewidth=1, figsize=(15, 10))
gdf_nodes.plot(ax=ax, markersize=3, color="red")

In [None]:
# from keplergl import KeplerGl

# m = KeplerGl(height=768, data={"nodes": gdf_nodes.copy(), "edges": gdf_edges.copy()})
# m

In [None]:
gdf_nodes

In [None]:
gdf_edges

In [None]:
cols = osm_way_loader.ACCEPTABLE_FEATURES.keys()
gdf_edges_exploded = gdf_edges
for col in cols:
    gdf_edges_exploded = gdf_edges_exploded.explode(col)

gdf_edges_exploded["i"] = range(0, len(gdf_edges_exploded))
gdf_edges_exploded.set_index("i", append=True, inplace=True)
gdf_edges_exploded

In [None]:
# TODO: preprocess data (normalize)

In [None]:
from functional import seq
features = seq(osm_way_loader.ACCEPTABLE_FEATURES.items()).flat_map(lambda x: [f"{x[0]}-{v}" if x[0] not in ["oneway"] else x[0] for v in x[1]]).distinct().to_list()
features

In [None]:
gdf_edges_wide = pd.get_dummies(gdf_edges_exploded[cols], prefix_sep="-").droplevel(3).groupby(level=[0, 1, 2]).max().astype(np.uint8)
# gdf_edges_wide.astype(pd.SparseDtype(np.uint8, 0)).info()

display(gdf_edges_wide)

In [None]:
gdf_edges_wide = gdf_edges_wide.reindex(columns=features, fill_value=0).astype(np.uint8)

In [None]:
gpd.GeoDataFrame(pd.concat([gdf_edges.drop(columns=cols), gdf_edges_wide], axis=1), crs="epsg:4326")

In [None]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import pytorch_lightning as pl
from torch.utils.data.dataloader import DataLoader
from sklearn.model_selection import train_test_split

display(torch.cuda.is_available())

class LitAutoEncoder(pl.LightningModule):
    def __init__(self, in_dim: int, hidden_dim: int = 64, latent_dim: int = 3, lr: float = 1e-3):
        super().__init__()

        self.save_hyperparameters()

        self.encoder = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, latent_dim),
            # nn.Tanh()
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, in_dim),
        )
        self.lr = lr

    def forward(self, x):
        z = self.encoder(x)
        return z

    def training_step(self, batch, batch_idx):
        return self._common_step(batch, batch_idx, 'train')

    def validation_step(self, batch, batch_idx):
        return self._common_step(batch, batch_idx, 'val')

    def test_step(self, batch, batch_idx):
        return self._common_step(batch, batch_idx, 'test')

    def _prepare_batch(self, batch, batch_idx):
        x = batch
        # x = x.view(x.size(0), -1)
        return x

    def _common_step(self, batch, batch_idx, stage: str) -> torch.Tensor:
        x = self._prepare_batch(batch, batch_idx)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        # loss = F.binary_cross_entropy_with_logits(x_hat, x)

        self.log(f'{stage}_loss', loss, on_epoch=True, on_step=True, prog_bar=True)

        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

In [None]:
test_size = 0.2
random_seed = 42
batch_size = 128
num_workers = 6
shuffle = True
hidden_dim = 64
enc_out_dim = 40
latent_dim = 30
epochs = 10
kl_coeff = 0.1
lr = 1e-3
n_features = gdf_edges_wide.shape[1]

pl.seed_everything(random_seed, workers=True)

X = torch.Tensor(gdf_edges_wide.values)
X_train, X_test = train_test_split(X, test_size=test_size, random_state=random_seed, shuffle=True)
X_train_dl = DataLoader(X_train, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
X_test_dl = DataLoader(X_test, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

model = LitAutoEncoder(in_dim=n_features, hidden_dim=hidden_dim, latent_dim=latent_dim, lr=lr)

# logger_tb = pl.loggers.TensorBoardLogger("tb_logs", name="test_model")

trainer = pl.Trainer(accelerator='gpu', devices=1, max_epochs=epochs)
trainer.fit(model, train_dataloaders=X_train_dl, val_dataloaders=X_test_dl)


In [None]:
%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/