In [1]:
import sys
from pathlib import Path

import pandas as pd
import folium
from folium.plugins import TimestampedGeoJson
import seaborn as sns
from matplotlib import cm, colors

# make project root discoverable
project_root = Path(__file__).resolve().parents[2] if "__file__" in globals() else Path().resolve().parents[1]
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

from paths import DATA_DIR


In [2]:
data = pd.read_parquet(DATA_DIR / "train.parquet")
data_test = pd.read_parquet(DATA_DIR / "final_test.parquet")

data.head()


Unnamed: 0,counter_id,counter_name,site_id,site_name,bike_count,date,counter_installation_date,coordinates,counter_technical_id,latitude,longitude,log_bike_count
48321,100007049-102007049,28 boulevard Diderot E-O,100007049,28 boulevard Diderot,0.0,2020-09-01 02:00:00,2013-01-18,"48.846028,2.375429",Y2H15027244,48.846028,2.375429,0.0
48324,100007049-102007049,28 boulevard Diderot E-O,100007049,28 boulevard Diderot,1.0,2020-09-01 03:00:00,2013-01-18,"48.846028,2.375429",Y2H15027244,48.846028,2.375429,0.693147
48327,100007049-102007049,28 boulevard Diderot E-O,100007049,28 boulevard Diderot,0.0,2020-09-01 04:00:00,2013-01-18,"48.846028,2.375429",Y2H15027244,48.846028,2.375429,0.0
48330,100007049-102007049,28 boulevard Diderot E-O,100007049,28 boulevard Diderot,4.0,2020-09-01 15:00:00,2013-01-18,"48.846028,2.375429",Y2H15027244,48.846028,2.375429,1.609438
48333,100007049-102007049,28 boulevard Diderot E-O,100007049,28 boulevard Diderot,9.0,2020-09-01 18:00:00,2013-01-18,"48.846028,2.375429",Y2H15027244,48.846028,2.375429,2.302585


In [3]:
m = folium.Map(location=data[["latitude", "longitude"]].mean(axis=0), zoom_start=13)

for _, row in (
    data[["counter_name", "latitude", "longitude"]]
    .drop_duplicates("counter_name")
    .iterrows()
):
    folium.Marker(
        row[["latitude", "longitude"]].values.tolist(), 
        popup=row["counter_name"]
    ).add_to(m)

m


  coords = (location[0], location[1])


In [4]:
grouped_data = (
    data.groupby(["counter_name", pd.Grouper(freq="1M", key="date")])["log_bike_count"]
    .sum()
    .reset_index()
)

coordinates_mapper = data[["counter_name", "latitude", "longitude"]].drop_duplicates()
grouped_data = grouped_data.merge(
    coordinates_mapper,
    on="counter_name",
    how="left"
)

grouped_data.head()


  data.groupby(["counter_name", pd.Grouper(freq="1M", key="date")])["log_bike_count"]
  data.groupby(["counter_name", pd.Grouper(freq="1M", key="date")])["log_bike_count"]


Unnamed: 0,counter_name,date,log_bike_count,latitude,longitude
0,152 boulevard du Montparnasse E-O,2020-09-30,2572.257371,48.840801,2.333233
1,152 boulevard du Montparnasse E-O,2020-10-31,2382.779737,48.840801,2.333233
2,152 boulevard du Montparnasse E-O,2020-11-30,2032.81023,48.840801,2.333233
3,152 boulevard du Montparnasse E-O,2020-12-31,2026.296477,48.840801,2.333233
4,152 boulevard du Montparnasse E-O,2021-01-31,1577.473441,48.840801,2.333233


In [5]:
# Normalize log_bike_count for gradient mapping
log_min, log_max = grouped_data["log_bike_count"].min(), grouped_data["log_bike_count"].max()
norm = colors.Normalize(vmin=log_min, vmax=log_max)

# Create a colormap (green → yellow → red, reversed for high=red)
cmap = cm.get_cmap("RdYlGn_r")

def get_gradient_color(log_value):
    rgba_color = cmap(norm(log_value))
    return colors.rgb2hex(rgba_color[:3])


  cmap = cm.get_cmap("RdYlGn_r")


In [6]:
features = [
    {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [row["longitude"], row["latitude"]],
        },
        "properties": {
            "time": row["date"].isoformat(),
            "icon": "circle",
            "iconstyle": {
                "fillColor": get_gradient_color(row["log_bike_count"]),
                "fillOpacity": 1,
                "stroke": "false",
                "radius": 8,
            },
            "style": {"weight": 0},
            "popup": f"{row['counter_name']} — Bike Count: {row['log_bike_count']}",
        },
    }
    for _, row in grouped_data.iterrows()
]

geojson_data = {
    "type": "FeatureCollection",
    "features": features,
}


In [7]:
m = folium.Map(location=data[["latitude", "longitude"]].mean(axis=0), zoom_start=13)

TimestampedGeoJson(
    data=geojson_data,
    period="P1M",              # Monthly steps
    add_last_point=False,
    auto_play=True,
    loop=True,
    max_speed=10,
    loop_button=True,
    date_options="YYYY-MM",
    time_slider_drag_update=True,
).add_to(m)

m


  coords = (location[0], location[1])
