# Population in districts

In [None]:
import geopandas as gpd
import pandas as pd

## Load data from file

In [None]:
warsaw_population = pd.read_json('data/warsaw_population.json')
warsaw_population

In [None]:
warsaw_districts = gpd.read_file('data/warsaw_districts.geojson')
warsaw_districts

## Join data

In [None]:
warsaw_districts_with_population = warsaw_districts.merge(warsaw_population, on='district', how='inner')
warsaw_districts_with_population

## Show data on the map

In [None]:
warsaw_districts_with_population.explore('population')

# Population in buildings

## Load data from OpenStreetMap

In [None]:
from srai.loaders.osm_loaders import OSMOnlineLoader

In [None]:
warsaw_region = warsaw_districts_with_population.unary_union

In [None]:
loader = OSMOnlineLoader()

osm_building_types = [
    "residential",
    "apartments",
    "house",
    "semidetached_house",
    "detached",
]

buildings = loader.load(
    gpd.GeoDataFrame(geometry=[warsaw_region], crs="EPSG:4326"),
    # warsaw_region,
    {"building": osm_building_types, "building:levels": True},
)
buildings

In [None]:
buildings = buildings[buildings["building"].isin(osm_building_types)]
buildings = buildings.fillna(1)
buildings

## Clear data

In [None]:
def map_floors(floors_value: str) -> int:
    try:
        floors = int(floors_value)
    except Exception:
        floors = 1

    return floors

In [None]:
buildings["building:levels"] = buildings["building:levels"].map(map_floors)
buildings

## Interpolate population over buildings
Use number of floors and area of a building as a weight - bigger and higher building usually have more inhabitants

In [None]:
buildings['weight'] = buildings.area * buildings['building:levels']
buildings

In [None]:
xmin = 21.042753111534097
xmax = 21.069257679735955
ymin = 52.24187245384607
ymax = 52.22533280016626
buildings.cx[xmin:xmax, ymin:ymax].explore()

In [None]:
buildings["geometry"] = buildings.centroid
buildings.cx[xmin:xmax, ymin:ymax].explore()

In [None]:
buildings_with_districts = buildings.sjoin(warsaw_districts_with_population, predicate='within')
buildings_with_districts

In [None]:
total_weight_in_districts = buildings_with_districts.groupby('district')['weight'].sum().rename("total_weight")
total_weight_in_districts

In [None]:
buildings_with_population = buildings_with_districts.merge(total_weight_in_districts, on="district", how="inner")
buildings_with_population["population_in_building"] = (
    buildings_with_population["population"] * buildings_with_population["weight"] / buildings_with_population["total_weight"]
).round()
buildings_with_population

## Aggregate data into H3 index for easier representation

In [None]:
import h3

In [None]:
buildings_with_population["h3"] = buildings_with_population.apply(
    lambda row: h3.latlng_to_cell(row.geometry.y, row.geometry.x, 8), axis=1
)
buildings_with_population

In [None]:
population_aggregated = (
    buildings_with_population.groupby("h3")["population_in_building"]
    .sum()
    .rename("population_in_h3")
    .reset_index()
)
population_aggregated

In [None]:
population_aggregated.population_in_h3.sum()

In [None]:
from srai.h3 import h3_to_geoseries

In [None]:
population_aggregated = gpd.GeoDataFrame(population_aggregated, geometry=h3_to_geoseries(population_aggregated.h3))
population_aggregated

## Show data on the map
3 proposed solutions:
- simple GeoPandas `.explore()`
- little bit prettier `srai.plotting` solution
- 3D Deck.gl map

In [None]:
# base folium - explore
population_aggregated.explore('population_in_h3')

In [None]:
# srai - plot numeric
from srai.plotting import plot_numeric_data

In [None]:
plot_numeric_data(
    population_aggregated.rename(columns={"h3": "region_id"}).set_index("region_id"),
    "population_in_h3",
)

In [None]:
# pydeck 3d
from srai.plotting.folium_wrapper import _generate_linear_colormap
import plotly.express as px
import pydeck as pdk

In [None]:
colormap = _generate_linear_colormap(
    # https://plotly.com/python/builtin-colorscales/
    px.colors.sequential.Aggrnyl_r,
    min_value=population_aggregated["population_in_h3"].min(),
    max_value=population_aggregated["population_in_h3"].max(),
)

population_aggregated["color"] = population_aggregated["population_in_h3"].map(
    colormap.rgb_bytes_tuple
)

# Define a layer to display on a map
layer = pdk.Layer(
    "H3HexagonLayer",
    population_aggregated,
    pickable=True,
    stroked=True,
    filled=True,
    extruded=True,
    get_hexagon="h3",
    get_fill_color="[color[0], color[1], color[2], 204]",
    elevation_scale=0.5,
    get_elevation="population_in_h3",
    coverage=0.8,
)

# Set the viewport location
view_state = pdk.ViewState(
    latitude=52.2317, longitude=21.0062, zoom=9.5, bearing=0, pitch=30
)


# Render
pdk.Deck(
    layers=[layer],
    map_style="light",
    initial_view_state=view_state,
    tooltip={"text": "Population: {population_in_h3}"},
)

# OneBoxes by Allegro
Find out which one is the greenest

In [None]:
one_boxes_raw = pd.read_json('data/oneboxes.json')
one_boxes_raw

In [None]:
one_boxes_gdf = gpd.GeoDataFrame(
    one_boxes_raw,
    geometry=gpd.GeoSeries.from_xy(one_boxes_raw["lon"], one_boxes_raw["lat"]),
    crs="EPSG:4326",
)
one_boxes_gdf.explore()

In [None]:
warsaw_one_boxes = one_boxes_gdf.clip(warsaw_region)
warsaw_one_boxes.explore()

In [None]:
from srai.geometry import buffer_geometry

In [None]:
warsaw_one_boxes = warsaw_one_boxes.rename(columns={"geometry": "point"})
warsaw_one_boxes["buffer_1000m"] = warsaw_one_boxes["point"].apply(
    lambda geometry: buffer_geometry(geometry, 1000)
)
warsaw_one_boxes = warsaw_one_boxes.set_geometry("buffer_1000m")
warsaw_one_boxes

In [None]:
warsaw_one_boxes.explore(style_kwds=dict(opacity=0.5, fillOpacity=0.1))

In [None]:
from srai.loaders.osm_loaders import OSMPbfLoader

In [None]:
loader = OSMPbfLoader()

In [None]:
greenery = loader.load(
    gpd.GeoDataFrame(geometry=[warsaw_region], crs="EPSG:4326"),
    # warsaw_region,
    {
        "leisure": ["garden", "park"],
        "natural": ["wood", "scrub", "heath", "grassland"],
        "landuse": ["grass", "orchard", "flowerbed", "forest", "greenfield", "meadow"],
    },
)
greenery

In [None]:
greenery = greenery[greenery.geom_type != 'Point']
greenery

In [None]:
greenery.plot(color="tab:green")

In [None]:
warsaw_one_boxes_with_greenery = warsaw_one_boxes.sjoin(greenery)
warsaw_one_boxes_with_greenery

In [None]:
warsaw_one_boxes_with_greenery = gpd.overlay(
    warsaw_one_boxes,
    greenery,
    how="intersection",
)

warsaw_one_boxes_with_greenery

In [None]:
# Dissolve replaces groupby in spatial operations
# https://geopandas.org/en/stable/docs/user_guide/aggregation_with_dissolve.html
warsaw_one_boxes_with_greenery = warsaw_one_boxes_with_greenery.dissolve(by='id')
warsaw_one_boxes_with_greenery

In [None]:
warsaw_one_boxes_with_greenery.plot(color='tab:green')

In [None]:
warsaw_one_boxes_with_greenery['greenery_area'] = warsaw_one_boxes_with_greenery['geometry'].area
warsaw_one_boxes_with_greenery = warsaw_one_boxes_with_greenery.sort_values(by='greenery_area', ascending=False)
warsaw_one_boxes_with_greenery.head()

In [None]:
m = warsaw_one_boxes.merge(
    warsaw_one_boxes_with_greenery["greenery_area"].reset_index(), on="id"
).explore(
    "greenery_area",
    cmap="BuGn",
    tiles="CartoDB positron",
    style_kwds=dict(opacity=0.5, fillOpacity=0.1),
)

warsaw_one_boxes['point'].explore(m=m, color='green')

In [None]:
warsaw_one_boxes_with_greenery.loc[["AL014WXY"]].explore(color="green")

# Venturilo - Contextual count embedder 

In [None]:
stations_raw = pd.read_json('data/venturilo_stations.json')
stations_raw

In [None]:
stations_gdf = gpd.GeoDataFrame(
    stations_raw,
    geometry=gpd.GeoSeries.from_xy(stations_raw["lon"], stations_raw["lat"]),
    crs="EPSG:4326",
)
stations_gdf.explore()

In [None]:
from srai.regionalizers import H3Regionalizer
from srai.plotting import plot_regions

In [None]:
h3_regions = H3Regionalizer(resolution=9).transform(
    gpd.GeoDataFrame(geometry=[warsaw_region], crs="EPSG:4326")
    # warsaw_region
)
plot_regions(h3_regions)

In [None]:
from srai.loaders.osm_loaders.filters import GEOFABRIK_LAYERS

In [None]:
geofabrik_features = OSMPbfLoader().load(
    gpd.GeoDataFrame(geometry=[warsaw_region], crs="EPSG:4326"),
    # warsaw_region,
    GEOFABRIK_LAYERS
)
geofabrik_features

In [None]:
geofabrik_features_without_stations = geofabrik_features[
    geofabrik_features["shopping"] != "amenity=bicycle_rental"
]
geofabrik_features_without_stations

In [None]:
from srai.embedders import ContextualCountEmbedder
from srai.neighbourhoods import H3Neighbourhood
from srai.joiners import IntersectionJoiner

In [None]:
joined_features = IntersectionJoiner().transform(h3_regions, geofabrik_features_without_stations)
joined_features

In [None]:
embedder = ContextualCountEmbedder(neighbourhood=H3Neighbourhood(), neighbourhood_distance=5, concatenate_vectors=True)
embeddings = embedder.transform(regions_gdf=h3_regions, features_gdf=geofabrik_features_without_stations, joint_gdf=joined_features)
embeddings

In [None]:
bikes_joint = IntersectionJoiner().transform(h3_regions, stations_gdf)
bikes_joint

In [None]:
positive_samples = h3_regions.join(bikes_joint, how="inner")
positive_samples = positive_samples.reset_index().drop(columns=["feature_id"]).set_index("region_id")
positive_samples["is_positive"] = True
len(positive_samples)

In [None]:
negative_samples = h3_regions.copy()
negative_samples["is_positive"] = False
negative_samples.loc[positive_samples.index, "is_positive"] = True
negative_samples = negative_samples[~negative_samples["is_positive"]]
len(negative_samples)

In [None]:
negative_undersampled = negative_samples.sample(n=3 * len(positive_samples), random_state=42)
negative_undersampled

In [None]:
CB_SAFE_PALLETE = [
    "#377eb8",
    "#ff7f00",
    "#4daf4a",
    "#f781bf",
    "#a65628",
    "#984ea3",
    "#999999",
    "#e41a1c",
    "#dede00",
]

In [None]:
train_data = pd.concat([positive_samples, negative_undersampled])
train_data.explore("is_positive", cmap=CB_SAFE_PALLETE, zoom_start=14, height=600, tiles="CartoDB positron")

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [None]:
X = embeddings.loc[train_data.index].to_numpy()
y = train_data["is_positive"].astype(int).to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
classifier = SVC(probability=True)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
y_pred_proba = classifier.predict_proba(X_test)

print(classification_report(y_test, y_pred))

In [None]:
station_probas = classifier.predict_proba(embeddings.to_numpy())

h3_regions["station_proba"] = station_probas[:, 1]
m = plot_numeric_data(h3_regions, "station_proba", colormap="Spectral_r", opacity=0.5)

stations_gdf.explore(m=m, color='black')

## Transfer learning

In [None]:
from srai.regionalizers import geocode_to_region_gdf

In [None]:
wroclaw_region = geocode_to_region_gdf('Wrocław, PL')
wroclaw_region

In [None]:
wroclaw_h3_regions = H3Regionalizer(resolution=9).transform(wroclaw_region)
wroclaw_h3_regions

In [None]:
wroclaw_geofabrik_features = OSMPbfLoader().load(
    wroclaw_region,
    GEOFABRIK_LAYERS
)
wroclaw_geofabrik_features_without_stations = wroclaw_geofabrik_features[
    wroclaw_geofabrik_features["shopping"] != "amenity=bicycle_rental"
]

In [None]:
wroclaw_joined_features = IntersectionJoiner().transform(wroclaw_h3_regions, wroclaw_geofabrik_features_without_stations)

In [None]:
wroclaw_embeddings = embedder.transform(
    regions_gdf=wroclaw_h3_regions,
    features_gdf=wroclaw_geofabrik_features_without_stations,
    joint_gdf=wroclaw_joined_features,
)
wroclaw_embeddings

In [None]:
station_probas = classifier.predict_proba(wroclaw_embeddings.to_numpy())

wroclaw_h3_regions["station_proba"] = station_probas[:, 1]
plot_numeric_data(wroclaw_h3_regions, "station_proba", colormap="Spectral_r", opacity=0.5)

# Taxis and flow prediction

In [None]:
import zipfile

from tqdm import tqdm

In [None]:
with zipfile.ZipFile('data/trips_hexes.zip', "r") as zf:
    for member in tqdm(zf.infolist(), desc=""):
        try:
            zf.extract(member, 'data')
        except zipfile.error:
            pass

In [None]:
taxi_trips_h3 = pd.read_csv('data/trips_hexes.csv')
taxi_trips_h3

In [None]:
taxi_trips_h3['start_point'] = taxi_trips_h3['start_hex'].apply(h3.cell_to_latlng)
taxi_trips_h3['end_point'] = taxi_trips_h3['end_hex'].apply(h3.cell_to_latlng)
taxi_trips_h3

In [None]:
taxi_trips_h3['start_lat'], taxi_trips_h3['start_lon'] = zip(*taxi_trips_h3['start_point'])
taxi_trips_h3['end_lat'], taxi_trips_h3['end_lon'] = zip(*taxi_trips_h3['end_point'])
taxi_trips_h3

In [None]:
taxi_trips_h3["trips_normalized"] = (
    (taxi_trips_h3["trips"] - taxi_trips_h3["trips"].min())
    / (taxi_trips_h3["trips"].max() - taxi_trips_h3["trips"].min())
)

In [None]:
arc_layer = pdk.Layer(
    "ArcLayer",
    data=taxi_trips_h3.sample(frac=0.1),
    get_width="0.5 + trips_normalized * 9",
    get_source_position=["start_lon", "start_lat"],
    get_target_position=["end_lon", "end_lat"],
    get_tilt=15,
    get_source_color="[0, 255, 0, 40 + trips_normalized * 215]",
    get_target_color="[0, 150, 255, 40 + trips_normalized * 215]",
    pickable=True,
    auto_highlight=True,
)

view_state = pdk.ViewState(latitude=41.1493, longitude=-8.6111, bearing=45, pitch=65, zoom=10.5,)

TOOLTIP_TEXT = {"html": "{trips} trips <br /> Start of the trip in green; end of the trip in blue"}
pdk.Deck(arc_layer, initial_view_state=view_state, tooltip=TOOLTIP_TEXT)


In [None]:
unique_hexes = set(taxi_trips_h3['start_hex'].unique()).union(taxi_trips_h3['end_hex'].unique())
len(unique_hexes)

In [None]:
coordinates = [h3.cell_to_latlng(h3_cell)[::-1] for h3_cell in unique_hexes]
coordinates[:10]

In [None]:
unique_points = gpd.GeoDataFrame(geometry=gpd.GeoSeries.from_xy(*zip(*coordinates)), crs='EPSG:4326')
unique_points.explore()

In [None]:
from srai.regionalizers import AdministrativeBoundaryRegionalizer

In [None]:
portugal_regionalizer = AdministrativeBoundaryRegionalizer(admin_level=7, clip_regions=False)

municipalities = portugal_regionalizer.transform(unique_points)
municipalities

In [None]:
municipalities.explore()

In [None]:
trip_h3_resolution = h3.get_resolution(taxi_trips_h3['start_hex'].iloc[0])
trip_h3_resolution

In [None]:
portugal_h3_regions = H3Regionalizer(resolution=trip_h3_resolution).transform(municipalities)
portugal_h3_regions

In [None]:
loader = OSMPbfLoader()

portugal_features = loader.load(municipalities, GEOFABRIK_LAYERS)
portugal_features

In [None]:
portugal_features[portugal_features.geom_type != 'Point'].plot()

In [None]:
portugal_joint_features = IntersectionJoiner().transform(portugal_h3_regions, portugal_features)
portugal_joint_features

In [None]:
from srai.embedders import Hex2VecEmbedder

In [None]:
hex2vec_embedder = Hex2VecEmbedder(encoder_sizes=[300, 150, 50])

In [None]:
portugal_h3_neighbourhood = H3Neighbourhood(portugal_h3_regions)
portugal_embeddings = hex2vec_embedder.fit_transform(
    regions_gdf=portugal_h3_regions,
    features_gdf=portugal_features,
    joint_gdf=portugal_joint_features,
    neighbourhood=portugal_h3_neighbourhood,
    negative_sample_k_distance=2,
    batch_size=64,
    learning_rate=0.001,
    trainer_kwargs={
        # "max_epochs": 50, # uncomment for a longer training
        "max_epochs": 5,
        # "accelerator": "cpu",
    },
)
portugal_embeddings

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)

pca_embeddings = pca.fit_transform(portugal_embeddings)
# make the embeddings into a dataframe
pca_embeddings = pd.DataFrame(pca_embeddings, index=portugal_embeddings.index)

# convert to RGB
pca_embeddings = (
    (pca_embeddings - pca_embeddings.min())
    / (pca_embeddings.max() - pca_embeddings.min())
    * 255
).astype(int)

# make the rgb array into a string
pca_embeddings["rgb"] = pca_embeddings.apply(
    lambda row: f"rgb({row[0]}, {row[1]}, {row[2]})", axis=1
)

porto_regions = portugal_h3_regions[
    portugal_h3_regions.intersects(
        municipalities.loc[["Porto", "Vila Nova de Gaia"]].unary_union
    )
]

color_dict = dict(
    enumerate(porto_regions.index.map(pca_embeddings["rgb"].to_dict()).to_list())
)
porto_regions.reset_index().reset_index().explore(
    column="index",
    tooltip="region_id",
    tiles="CartoDB positron",
    legend=False,
    cmap=lambda x: color_dict[x],
    style_kwds=dict(color="#444", opacity=0.0, fillOpacity=0.5),
)

In [None]:
portugal_h3_index = portugal_embeddings.index

In [None]:
sum_of_trips_per_hex = taxi_trips_h3.groupby('start_hex')['trips'].sum()
sum_of_trips_per_hex

In [None]:
portugal_h3_index_with_trips = portugal_h3_index.intersection(sum_of_trips_per_hex.index)
portugal_h3_index_with_trips

In [None]:
portugal_h3_index_without_trips = portugal_h3_index.difference(sum_of_trips_per_hex.index)

In [None]:
sum_of_trips_per_hex_full = pd.DataFrame(
    data=dict(
        trips=[0 for _ in portugal_h3_index_without_trips] + list(sum_of_trips_per_hex.values)
    ),
    index=list(portugal_h3_index_without_trips) + list(sum_of_trips_per_hex.index),
)
sum_of_trips_per_hex_full

In [None]:
from sklearn.svm import SVR

In [None]:
X = portugal_embeddings.loc[sum_of_trips_per_hex_full.index].to_numpy()
y = sum_of_trips_per_hex_full["trips"].astype(int).to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error

In [None]:
regressor = SVR()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

In [None]:
print(
    r2_score(y_test, y_pred),
    mean_absolute_error(y_test, y_pred),
    mean_absolute_percentage_error(y_test, y_pred),
)
# Those numbers are probably very bad xD

In [None]:
# TODO: add predicted number of trips to the visualization with some splitting over destinations

In [None]:
from typing import Any, List

import torch
from torch import nn
from pytorch_lightning import LightningModule

def weighted_mse_loss(input, target, weight):
    return torch.sum(weight * (input - target) ** 2)

class TripPredictorModel(LightningModule):
    def __init__(self) -> None:
        super().__init__()
        self.nn_model = nn.Sequential(
            nn.Linear(50, 30),
            nn.ReLU(),
            nn.Linear(30, 30),
            nn.ReLU(),
            nn.Linear(30, 50),
        )

    def forward(self, x: "torch.Tensor") -> "torch.Tensor":
        """
        Forward pass.

        Args:
            x (torch.Tensor): Input tensor.
        """
        embedding: "torch.Tensor" = self.nn_model(x)
        return embedding

    def configure_optimizers(self) -> "torch.optim.Optimizer":
        """Configure optimizer."""
        import torch

        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def training_step(self, batch: List["torch.Tensor"], batch_idx: Any) -> "torch.Tensor":
        """
        Training step.

        Args:
            batch (torch.Tensor): Batch.
            batch_idx (Any): Batch index.
        """
        x, y, weight = batch
        y_pred = self.nn_model(x)
        loss = weighted_mse_loss(y_pred, y, weight)
        self.log("train_loss", loss)
        return loss

In [None]:
from torch.utils.data import Dataset


class TripsDataset(Dataset):
    def __init__(self, start_embeddings, end_embeddings, trips):
        self.start_embeddings = torch.Tensor(start_embeddings)
        self.end_embeddings = torch.Tensor(end_embeddings)
        self.trips = torch.Tensor(trips.reshape((len(trips), 1)))

    def __getitem__(self, index):
        x = self.start_embeddings[index]
        y = self.end_embeddings[index]
        weight = self.trips[index]
        return x, y, weight

    def __len__(self):
        return len(self.start_embeddings)

In [None]:
start_hexes = taxi_trips_h3['start_hex']
end_hexes = taxi_trips_h3['end_hex']
no_trips = taxi_trips_h3['trips']

trips_dataset = TripsDataset(
    start_embeddings=portugal_embeddings.loc[start_hexes].to_numpy(),
    end_embeddings=portugal_embeddings.loc[end_hexes].to_numpy(),
    trips=no_trips.to_numpy(),
)

In [None]:
from torch.utils.data import DataLoader
import pytorch_lightning as pl

trainer_kwargs = {
    # "max_epochs": 50, # uncomment for a longer training
    "max_epochs": 5,
    # "accelerator": "cpu",
}

dataloader = DataLoader(trips_dataset, batch_size=128, shuffle=True, num_workers=0)
trip_predictor_model = TripPredictorModel()



In [None]:
trainer = pl.Trainer(**trainer_kwargs)
trainer.fit(trip_predictor_model, dataloader)

In [None]:
from annoy import AnnoyIndex

In [None]:
portugal_annoy_index = AnnoyIndex(50, "angular")

all_portugal_embeddings = portugal_embeddings.to_numpy()
all_portugal_embeddings.shape

for idx in tqdm(range(len(portugal_h3_index)), total=len(portugal_h3_index)):
    portugal_annoy_index.add_item(idx, all_portugal_embeddings[idx])
    
portugal_annoy_index.build(100, n_jobs=-1)

In [None]:
portugal_predicted_trips_embeddings = trip_predictor_model(torch.Tensor(all_portugal_embeddings)).detach().numpy()
portugal_predicted_trips_embeddings

In [None]:
trip_pairs = []
for idx in tqdm(range(len(portugal_h3_index)), total=len(portugal_h3_index)):
    trip_end_embedding = portugal_predicted_trips_embeddings[idx]
    nearest_neighbours_ids, distances = portugal_annoy_index.get_nns_by_vector(
        # trip_end_embedding, n=100, include_distances=True
        trip_end_embedding,
        n=10,
        include_distances=True,
    )
    nearest_neighbours = portugal_h3_index[nearest_neighbours_ids]

    start_index = portugal_h3_index[idx]
    for nearest_neighbour, distance in zip(nearest_neighbours, distances):
        trip_pairs.append(
            dict(start_hex=start_index, end_hex=nearest_neighbour, distance=distance)
        )

predicted_trips = pd.DataFrame(trip_pairs)
predicted_trips["start_point"] = predicted_trips["start_hex"].apply(h3.cell_to_latlng)
predicted_trips["end_point"] = predicted_trips["end_hex"].apply(h3.cell_to_latlng)
predicted_trips["start_lat"], predicted_trips["start_lon"] = zip(
    *predicted_trips["start_point"]
)
predicted_trips["end_lat"], predicted_trips["end_lon"] = zip(
    *predicted_trips["end_point"]
)
predicted_trips

In [None]:
arc_layer = pdk.Layer(
    "ArcLayer",
    data=predicted_trips.sample(frac=0.1),
    # get_width="5 * (1 - distance)",
    get_width="0.5 * (1 - distance)",
    get_source_position=["start_lon", "start_lat"],
    get_target_position=["end_lon", "end_lat"],
    get_tilt=15,
    get_source_color="[0, 255, 0, 40 + trips_normalized * 215]",
    get_target_color="[0, 150, 255, 40 + trips_normalized * 215]",
    pickable=True,
    auto_highlight=True,
)

view_state = pdk.ViewState(latitude=41.1493, longitude=-8.6111, bearing=45, pitch=65, zoom=10.5,)

TOOLTIP_TEXT = {"html": "Predicted distance {distance} <br /> Start of the trip in green; end of the trip in blue"}
pdk.Deck(arc_layer, initial_view_state=view_state, tooltip=TOOLTIP_TEXT)