In [2]:
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine("postgresql+psycopg2://******:******@localhost:5433/layereddb")

bike_lanes = pd.read_sql("""SELECT
    district_id,
    district,
    COUNT(DISTINCT bikelane_id) AS bike_lane_count,
    SUM(length_m)/1000  AS total_bike_lane_km
FROM
    berlin_source_data.bike_lanes
GROUP BY
    district_id,district""", engine)

bike_lanes

Unnamed: 0,district_id,district,bike_lane_count,total_bike_lane_km
0,11001001,Mitte,7552,460.034537
1,11002002,Friedrichshain-Kreuzberg,4682,248.631691
2,11003003,Pankow,6693,710.96871
3,11004004,Charlottenburg-Wilmersdorf,7991,576.16455
4,11005005,Spandau,3362,412.91488
5,11006006,Steglitz-Zehlendorf,8976,687.074532
6,11007007,Tempelhof-Schöneberg,7438,462.387772
7,11008008,Neukölln,4287,360.631901
8,11009009,Treptow-Köpenick,6292,703.774888
9,11010010,Marzahn-Hellersdorf,9665,687.593283


In [3]:
district_attrs = pd.read_sql("""
    SELECT *
    FROM berlin_labels.district_attributes
""", engine)

district_attrs.head()

Unnamed: 0,district_id,area_sq_km,inhabitants,area_coefficient,population_coefficient
0,11004004,64.662978,343081,0.871208,1.061595
1,11002002,20.389118,293454,0.274704,0.908034
2,11011011,52.091363,311881,0.70183,0.965053
3,11010010,61.782422,291948,0.832398,0.903374
4,11001001,39.379173,397134,0.530558,1.228851


In [4]:
df = district_attrs.merge(bike_lanes, on="district_id", how="left").fillna(0)

df

Unnamed: 0,district_id,area_sq_km,inhabitants,area_coefficient,population_coefficient,district,bike_lane_count,total_bike_lane_km
0,11004004,64.662978,343081,0.871208,1.061595,Charlottenburg-Wilmersdorf,7991,576.16455
1,11002002,20.389118,293454,0.274704,0.908034,Friedrichshain-Kreuzberg,4682,248.631691
2,11011011,52.091363,311881,0.70183,0.965053,Lichtenberg,5929,486.786417
3,11010010,61.782422,291948,0.832398,0.903374,Marzahn-Hellersdorf,9665,687.593283
4,11001001,39.379173,397134,0.530558,1.228851,Mitte,7552,460.034537
5,11008008,44.907902,330017,0.605047,1.021171,Neukölln,4287,360.631901
6,11003003,103.162091,424307,1.389909,1.312933,Pankow,6693,710.96871
7,11012012,89.28078,268792,1.202885,0.831723,Reinickendorf,5966,658.288595
8,11005005,91.836013,257091,1.237312,0.795516,Spandau,3362,412.91488
9,11006006,102.514181,310446,1.381179,0.960613,Steglitz-Zehlendorf,8976,687.074532


In [5]:
# ---  Calculate core bike lane metrics ---

# Density = total length of bike lanes per km²
df["bike_lane_density_km2"] = df["total_bike_lane_km"] / df["area_sq_km"]

# Accessibility = total bike lane length per 1,000 inhabitants
df["bike_lane_per_1000"] = df["total_bike_lane_km"] / (df["inhabitants"] / 1000)

# Include number of bike lanes in the score to reflect network complexity
df["bike_lane_score"] = (
    df["bike_lane_density_km2"] * df["area_coefficient"]
    + df["bike_lane_per_1000"] * df["population_coefficient"]
    + df["bike_lane_count"] * 0.001  # small weight to not dominate other metrics
)



In [6]:
# ---  Compute percentiles ---
q75 = df["bike_lane_score"].quantile(0.75)
q50 = df["bike_lane_score"].quantile(0.50)
q25 = df["bike_lane_score"].quantile(0.25)

# ---  Define labeling function ---
def assign_bike_label(row):
    if row["bike_lane_score"] >= q75:
        return "very_high_bike_infrastructure"
    elif row["bike_lane_score"] >= q50:
        return "high_bike_infrastructure"
    elif row["bike_lane_score"] >= q25:
        return "medium_bike_infrastructure"
    else:
        return "low_bike_infrastructure"

# ---  Apply labels ---
df["bike_friendly_label"] = df.apply(assign_bike_label, axis=1)

# ---  Inspect results ---
df[["district", "bike_lane_score", "bike_friendly_label"]].sort_values("bike_lane_score", ascending=False)

Unnamed: 0,district,bike_lane_score,bike_friendly_label
3,Marzahn-Hellersdorf,21.056603,very_high_bike_infrastructure
9,Steglitz-Zehlendorf,20.359008,very_high_bike_infrastructure
6,Pankow,18.471872,very_high_bike_infrastructure
11,Treptow-Köpenick,17.951689,high_bike_infrastructure
0,Charlottenburg-Wilmersdorf,17.536523,high_bike_infrastructure
7,Reinickendorf,16.872101,high_bike_infrastructure
4,Mitte,15.173556,medium_bike_infrastructure
10,Tempelhof-Schöneberg,15.098543,medium_bike_infrastructure
2,Lichtenberg,13.993764,medium_bike_infrastructure
5,Neukölln,10.261717,low_bike_infrastructure


In [None]:
# --- Select relevant columns ---
bike_labels_df = df[["district_id", "bike_friendly_label"]].copy()

# ---  Create list of labels per district ---
bike_labels_df["labels_list"] = bike_labels_df.apply(
    lambda r: [r["bike_friendly_label"]] if pd.notna(r["bike_friendly_label"]) else [],
    axis=1
)

# --- Expand list into separate rows ---
final_bike_df = (
    bike_labels_df[["district_id", "labels_list"]]
    .explode("labels_list")
    .dropna(subset=["labels_list"])
    .rename(columns={"labels_list": "label"})
)

# ---  Add category name ---
final_bike_df["category"] = "Mobility & Accessibility"

# --- Add hashtags for consistency ---
final_bike_df["label"] = "#" + final_bike_df["label"]

# --- Reorder columns for DB upload ---
final_bike_df = final_bike_df[["district_id", "category", "label"]]

# ---  Preview final table ---
print("✅ Preview of final data to upload:")
final_bike_df

✅ Preview of final data to upload:


Unnamed: 0,district_id,category,label
0,11004004,Mobility & Accessibility,#high_bike_infrastructure
1,11002002,Mobility & Accessibility,#low_bike_infrastructure
2,11011011,Mobility & Accessibility,#medium_bike_infrastructure
3,11010010,Mobility & Accessibility,#very_high_bike_infrastructure
4,11001001,Mobility & Accessibility,#medium_bike_infrastructure
5,11008008,Mobility & Accessibility,#low_bike_infrastructure
6,11003003,Mobility & Accessibility,#very_high_bike_infrastructure
7,11012012,Mobility & Accessibility,#high_bike_infrastructure
8,11005005,Mobility & Accessibility,#low_bike_infrastructure
9,11006006,Mobility & Accessibility,#very_high_bike_infrastructure


In [10]:
# --- Upload to Database ---

try:
    final_bike_df.to_sql(
        'district_labels_new',     
        engine,
        schema='berlin_labels',
        if_exists='append',        
        index=False
    )
    print(f"✅ Successfully uploaded {len(final_bike_df)} bike labels to the database.")
except Exception as e:
    print(f"❌ Upload error: {e}")

✅ Successfully uploaded 12 bike labels to the database.
