## Prepare

In [1]:
%load_ext autoreload
%autoreload 2
import sys

sys.path.append("../src/")
import itertools
import os

import folium
import matplotlib.pyplot as plt
import pandas as pd
from mcr_py.package.geometa import GeoMeta
from mcr_py.package.logger import setup
from mcr_py.package.mcr.data import NetworkType, OSMData
from mcr_py.package.mcr5.labels import read_labels_for_nodes
from mcr_py.package.minute_city import minute_city
from mcr_py.package.minute_city.profile import fill_columns_by_left
from tqdm.auto import tqdm

setup("INFO")

In [2]:
city_id_osm = "Koeln"
city_id = "cologne"
date_ = "20240423"

In [3]:
geo_meta_path = f"../data/stateful_variables/{city_id}_geometa.pkl"
stops = f"../data/gtfs-cleaned/{city_id}_{date_}/stops.csv"
structs = f"../data/gtfs-cleaned/{city_id}_{date_}/structs.pkl"

geo_meta = GeoMeta.load(geo_meta_path)
geo_data = OSMData(
    geo_meta,
    city_id_osm,
    additional_network_types=[NetworkType.DRIVING],
)
nodes = geo_data.osm_nodes
edges = geo_data.osm_edges
driving_nodes, _, _ = geo_data.additional_networks[NetworkType.DRIVING]

## Fetch POIs

In [4]:
pois = minute_city.fetch_pois_for_area(geo_meta.boundary, nodes)
pois.head(3)

Unnamed: 0,name,id,lat,lon,geometry,type,nearest_osm_node_id,distance
0,Trink & Spare,263442909,51.0268777,6.8879464,POINT (6.88795 51.02688),grocery,9007727500,21.104681
1,Weinhaus Linke,444820559,50.9543812,6.9189388,POINT (6.91894 50.95438),grocery,3890235659,13.294807
2,O'Donnell Moonshine,448400534,50.9382394,6.9408967,POINT (6.94090 50.93824),grocery,6513219752,16.728748


## Visualize POIs

In [6]:
pois["type"].unique()

array(['grocery', 'education', 'health', 'banks', 'parks', 'sustenance',
       'shops'], dtype=object)

In [None]:
def rgb2hex(rgb):
    return "#{:02x}{:02x}{:02x}".format(
        int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255)
    )


types = pois["type"].unique()

types = ["grocery", "education", "health", "banks", "parks", "sustenance", "shops"]

# Get a colormap and create a color mapping
colormap = plt.colormaps.get_cmap("tab10")
color_mapping = {type_: rgb2hex(colormap(i)) for i, type_ in enumerate(types)}
color_mapping

bbox = nodes.unary_union.convex_hull.bounds

t = types[0]
print(t)

m = folium.Map(location=[50.9375, 6.9603], zoom_start=12)
# geo_meta.add_to_folium_map(m)
# selected_pois = pois[pois["type"] == t]
selected_pois = pois
for _, poi in selected_pois.iterrows():
    folium.CircleMarker(
        location=(poi.lat, poi.lon),
        popup=poi["name"],
        radius=1,
        color=color_mapping[poi["type"]],
    ).add_to(m)

m

## Calculate 15-minute-city metric

In [8]:
mcr5_output_path = f"../data/mcr5/{city_id}_{date_}"

In [9]:
labels_per_scenario = {}
for entry in os.scandir(mcr5_output_path):
    if not entry.is_dir():
        continue

    labels = read_labels_for_nodes(entry.path, pois.nearest_osm_node_id.unique())

    labels = minute_city.add_pois_to_labels(labels, pois)
    labels_per_scenario[entry.name] = labels

In [10]:
types = list(pois["type"].unique())

In [11]:
profiles_df_per_scenario = {}
for scenario, labels in tqdm(labels_per_scenario.items()):
    profiles_df_per_scenario[scenario] = minute_city.get_profiles_df(
        labels, types, disable_tqdm=True
    )

  0%|          | 0/33 [00:00<?, ?it/s]

In [12]:
for scenario, profiles_df in profiles_df_per_scenario.items():
    profiles_df["scenario"] = scenario

In [13]:
profiles_df = pd.concat(profiles_df_per_scenario.values())

### per type profile

In [14]:
profiles_df_per_scenario_per_type = {t: {} for t in types}

for t, (scenario, labels) in tqdm(
    list(itertools.product(types, labels_per_scenario.items()))
):
    profiles_df_per_scenario_per_type[t][scenario] = minute_city.get_profiles_df(
        labels, [t], disable_tqdm=True
    )

  0%|          | 0/231 [00:00<?, ?it/s]

In [15]:
for t, dfs in profiles_df_per_scenario_per_type.items():
    for scenario, df in dfs.items():
        df["category"] = t
        df["scenario"] = scenario

profiles_df_categories = pd.concat(
    [df for dfs in profiles_df_per_scenario_per_type.values() for df in dfs.values()]
)

### Profile dataframe polishing

In [16]:
def calculate_unit_metrics(profiles_df):
    profiles_df["required_cost_for_optimal_in_euro"] = (
        profiles_df["required_cost_for_optimal"] / 100
    )
    profiles_df["optimal_in_minutes"] = profiles_df["optimal"] / 60
    return profiles_df


profiles_df = calculate_unit_metrics(profiles_df)
profiles_df_categories = calculate_unit_metrics(profiles_df_categories)

In [17]:
def reorder_columns(profiles_df):
    cost_columns = []
    other_columns = []

    for column in profiles_df.columns:
        if column.startswith("cost_"):
            cost_columns.append(column)
        else:
            other_columns.append(column)

    cost_columns.sort(key=lambda x: int(x.split("_")[1]))
    new_columns = other_columns + cost_columns

    # Reorder the DataFrame columns
    profiles_df = profiles_df[new_columns]

    return profiles_df, cost_columns


profiles_df, cost_columns = reorder_columns(profiles_df)
profiles_df_categories, cost_columns_categories = reorder_columns(
    profiles_df_categories
)

In [18]:
profiles_df[cost_columns] = fill_columns_by_left(profiles_df[cost_columns])
profiles_df_categories[cost_columns_categories] = fill_columns_by_left(
    profiles_df_categories[cost_columns_categories]
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  profiles_df["cost_0"] = profiles_df["cost_0"].fillna(float("inf"))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  profiles_df[c] = profiles_df[c].fillna(profiles_df[previous_column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  profiles_df["cost_0"] = profiles_df["cost_0"].fillna(float("inf"))
A 

In [19]:
def trim_trailing_numbers(string):
    has_trailing_numbers = string[-1].isdigit()
    if not has_trailing_numbers:
        return string
    return "_".join(string.split("_")[:-1])

In [20]:
profiles_df["core_scenario"] = profiles_df["scenario"].apply(trim_trailing_numbers)
profiles_df_categories["core_scenario"] = profiles_df_categories["scenario"].apply(
    trim_trailing_numbers
)

In [21]:
profiles_df.to_feather(f"{mcr5_output_path}/profiles-tariffs.feather")
profiles_df_categories.to_feather(
    f"{mcr5_output_path}/profiles-categories-tariffs.feather"
)