In [2]:
import pandas as pd
import h3
traffic_data = pd.read_parquet("../for_participants/data_parquet/traffic_train.parquet")
pois_df = pd.read_parquet("../for_participants/data_parquet/pois.parquet")

In [7]:
import pandas as pd
import numpy as np
import h3

def aggregate_by_dist(
    agg_points,
    data,
    allowed_distance=2,
    agg_fun=np.mean,
):
    all_h3s = pd.unique(np.concatenate([agg_points, data["h3res13"].values]))
    h3_to_coord = {h: h3.cell_to_latlng(h) for h in all_h3s}
    def distance_km_cached(h1, h2):
        loc1 = h3_to_coord[h1]
        loc2 = h3_to_coord[h2]
        return h3.great_circle_distance(loc1, loc2)

    results = []

    for point in agg_points:
        distances = data["h3res13"].apply(lambda x: distance_km_cached(x, point))
        nearby = data[distances < allowed_distance]

        if nearby.empty:
            continue

        aggregated = nearby.drop(columns=["h3res13"]).agg(agg_fun)
        aggregated["h3res13"] = point
        results.append(aggregated)

    if not results:
        return pd.DataFrame(columns=data.columns)

    return pd.DataFrame(results)


In [44]:
roads = pd.read_parquet('../for_participants/data_parquet/roads.parquet')
roads["roads_intensity_1"] = roads[["motorway", "primary", "secondary"]].sum(axis = 1)
roads["roads_intensity_2"] = roads[["tertiary", "residential", "living_street", "service"]].sum(axis = 1)
roads["roads_intensity_3"] = roads[["track", "footway", "cycleway", "bridleway", "path", "steps", "pedestrian"]].sum(axis = 1)

In [45]:
roads = roads[["h3res13", "roads_intensity_1", "roads_intensity_2", "roads_intensity_3"]]

In [46]:
from tqdm import tqdm
dfs0 = []
for dist in tqdm([0.2, 0.5, 1]):
    df0 = aggregate_by_dist(agg_points = traffic_data["h3res13"].unique(), data = roads[["h3res13", "roads_intensity_1", "roads_intensity_2", "roads_intensity_3"]], allowed_distance = dist, agg_fun = np.mean)
    new_columns = {
        col: col + "_" + str(dist) if col != "h3res13" else col
        for col in df0.columns
    }
    df0 = df0.rename(columns = new_columns)
    dfs0.append(df0)

100%|██████████| 3/3 [06:30<00:00, 130.11s/it]


In [48]:
df_roads_agg = pd.concat(dfs0, axis = 1)
df_roads_agg = df_buildings_agg.loc[:,~df_buildings_agg.columns.duplicated()]
df_roads_agg.to_parquet("dfs_processed/df_roads_agg.parquet")

In [33]:
buildings = pd.read_parquet('../for_participants/data_parquet/buildings.parquet')

In [34]:
building_function_group = {
    "Single-Family Residence": "Residential",
    "Multi-Family Residence": "Residential",
    "Apartment": "Residential",
    "Collected Dwelling Unit": "Residential",
    "Hotel": "Residential",
    "Tourist Accommodation Building": "Residential",
    "Retail and Service Building": "Service",
    "Hospitals and Medical Facility": "Service",
    "Schools and Research Institute": "Service",
    "Museums and Libraries": "Service",
    "Cultural Facility": "Service",
    "Cultural Public Facility": "Service",
    "Place of Worship": "Service",
    "Heritage Building": "Service",
    "Car Park": "Service",
    "Railway and Terminal Building": "Service",
    "Office Building": "Work",
    "Industrial Building": "Work",
    "Silo or Warehouse": "Work",
    "Agricultural Building": "Work",
    "Non-Residential Building": "Work",
}

buildings["building_function"] = buildings["building_function"].apply(lambda x: building_function_group[x]) #[""]

In [35]:
from tqdm import tqdm
buildings = buildings.pivot_table(
    index="h3res13",                   # grupuj po lokalizacji
    columns="building_function",       # przekształć kategorie w kolumny
    values="building_area",            # wartości to powierzchnia
    aggfunc="sum",                     # lub np. "mean" / "max"
    fill_value=0                       # brak danych = 0
).reset_index()

In [39]:
dfs0 = []
for dist in tqdm([0.5, 1, 1.5, 2]):
    df0 = aggregate_by_dist(agg_points = traffic_data["h3res13"].unique(), data = buildings[["h3res13", "Residential", "Service", "Work"]], allowed_distance = dist, agg_fun = np.sum)
    new_columns = {
        col: col + "_" + str(dist) if col != "h3res13" else col
        for col in df0.columns
    }
    df0 = df0.rename(columns = new_columns)
    dfs0.append(df0)

100%|██████████| 4/4 [06:44<00:00, 101.15s/it]


In [None]:
df_buildings_agg = pd.concat(dfs0, axis = 1)
df_buildings_agg = df_buildings_agg.loc[:,~df_buildings_agg.columns.duplicated()]
df_buildings_agg.to_parquet("dfs_processed/df_buildings_agg.parquet")