#### Setup

In [1]:
import polars as pl
from analysis.characterisation.clustering import cluster_timeseries_usage, usage_probabilities
from analysis.characterisation.notebooks.notebook_config import (
    dl,
    FEATURES,
    N_CLUSTERS,
    DATASET_START,
    DATASET_END,
    TIME_SERIES_MODE,
    WINDOW_MONTHS,
)

usage = cluster_timeseries_usage(
    loader=dl,
    k=N_CLUSTERS,
    features=FEATURES,
    start=DATASET_START,
    end=DATASET_END,
    mode=TIME_SERIES_MODE,
    window_months=WINDOW_MONTHS
)

usage_probs = usage_probabilities(usage).sort(["station", "probability"], descending=True)


Perform Clustering in Interval 2016-01-01 until 2018-01-01
Perform Clustering in Interval 2016-02-01 until 2018-02-01
Perform Clustering in Interval 2016-03-01 until 2018-03-01
Perform Clustering in Interval 2016-04-01 until 2018-04-01
Perform Clustering in Interval 2016-05-01 until 2018-05-01
Perform Clustering in Interval 2016-06-01 until 2018-06-01
Perform Clustering in Interval 2016-07-01 until 2018-07-01
Perform Clustering in Interval 2016-08-01 until 2018-08-01
Perform Clustering in Interval 2016-09-01 until 2018-09-01
Perform Clustering in Interval 2016-10-01 until 2018-10-01
Perform Clustering in Interval 2016-11-01 until 2018-11-01
Perform Clustering in Interval 2016-12-01 until 2018-12-01
Perform Clustering in Interval 2017-01-01 until 2019-01-01
Perform Clustering in Interval 2017-02-01 until 2019-02-01
Perform Clustering in Interval 2017-03-01 until 2019-03-01
Perform Clustering in Interval 2017-04-01 until 2019-04-01
Perform Clustering in Interval 2017-05-01 until 2019-05-

## 7. Impact of Weather on Station Usage Patterns

### Event based effects

We will now investiage the effects of predefined weather effects on different usage types. 

Before we start we have to figure out how we should define the temperature, perticipation and wind classes. 

For this we will take a look at the weather data of Heidelberg.

#### Weather class definitions
Temperature classes were defined using empirical terciles (Q33, Q66).
For Precipitation zero values (dry days) were filtered out.

In [None]:
wd = dl.get_weather().df

wd_daily = (
    wd
    .with_columns(pl.col("datetime").dt.date().alias("date"))
    .group_by("date")
    .agg(
        pl.col("temperature_2m").max().alias("temp_max"),
        pl.col("precipitation").sum().alias("precip_sum"),
        pl.col("wind_speed_10m").max().alias("wind_max"),
    )
)

temp_quantiles = (
    wd_daily
    .select(
        pl.col("temp_max").quantile(0.33).alias("q33"),
        pl.col("temp_max").quantile(0.66).alias("q66"),
    )
)

precip_quantiles = (
    wd_daily
    .filter(pl.col("precip_sum") > 0)
    .select(
        pl.col("precip_sum").quantile(0.8).alias("q80"),
    )
)

wind_quantiles = (
    wd_daily
    .select(
        pl.col("wind_max").quantile(0.33).alias("q33"),
        pl.col("wind_max").quantile(0.66).alias("q66"),
    )
)


print("Temperature quantiles (daily max)")
display(temp_quantiles)

print("\nPrecipitation quantiles (daily sum)")
display(precip_quantiles)

print("Wind quantiles (daily max)")
display(wind_quantiles)

Temperature quantiles


q33,q66
f64,f64
10.7,19.8



Precipitation quantiles ()


q80
f64
7.1


Wind quantiles (daily max)


q33,q66
f64,f64
12.0,16.8


**Temperature**

We define three different temperature ranges:
$$
\begin{aligned}
\text{L (Low):} \quad & \max(T_{\text{day}}) < 10.7^\circ \mathrm{C} \\
\text{M (Medium):} \quad & 10.7 \leq \max(T_{\text{day}}) < 19.8^\circ \mathrm{C} \\
\text{H (High):} \quad & \max(T_{\text{day}}) \geq 19.8^\circ \mathrm{C}
\end{aligned}
$$

implemented in `temp_ranges`.

In [3]:
def temp_ranges(col):
    return (
        pl.when(col < 7.2).then(pl.lit("L"))
        .when(col < 15.1).then(pl.lit("M"))
        .otherwise(pl.lit("H"))
    )

We will use low temperatures (*L*) as baseline and calculate the relative change to range *M* and *H* per station. 

$$
\text{rel\_diff}_X = \frac{\bar{C}_X - \bar{C}_L}{\bar{C}_L} \times 100 
\quad \text{for } X \in \{M, H\}
$$

Furthermore we will look seperately at weekdays and weekends.

In [4]:
from analysis.characterisation.weather import weather_response_df
from analysis.characterisation.helpers import dominant_usage_per_station

def base_df(loader, usage_probs, sample_rate="1h"):
  df = weather_response_df(loader=loader, sample_rate=sample_rate)

  # add dominant usage type 
  df = df.join(dominant_usage_per_station(usage_probs), on="station", how="left")

  # add weekday / weekend classification
  df = df.with_columns([ 
    pl.when(pl.col("datetime").dt.weekday() <= 4)
      .then(pl.lit("weekday")) 
      .otherwise(pl.lit("weekend")) 
      .alias("day_type") 
  ])

  return df

In [5]:
from analysis.characterisation.event import event_effect_table

df = base_df(loader=dl, usage_probs=usage_probs).drop(["precip_sum", "wind_max"])
df = df.with_columns(
    temp_ranges(pl.col("temp_max")).alias("temp_range")
)

temp_table = event_effect_table(
    df,
    range_col="temp_range",
    group_cols=("station", "usage_type", "day_type"),
    agg_cols=("usage_type", "day_type"),
)

temp_table

usage_type,day_type,mean_count_H,mean_count_M,mean_count_L,rel_diff_H,rel_diff_M,rel_diff_L
str,str,f64,f64,f64,f64,f64,f64
"""mixed""","""weekday""",2173.38191,1599.092437,1383.361702,69.64,21.12,0.0
"""mixed""","""weekend""",1708.458472,1096.881356,886.703704,115.63,26.78,0.0
"""recreational""","""weekday""",1600.078153,1150.512535,1009.100503,35.89,6.88,0.0
"""recreational""","""weekend""",1375.209906,971.041667,754.486842,63.86,16.24,0.0
"""utilitarian""","""weekday""",5020.918736,3895.139706,3424.304348,38.65,10.55,0.0
"""utilitarian""","""weekend""",4061.065476,2811.023364,2367.888889,61.39,17.39,0.0


We can see mixed stations react the strongest to temperature change. 
Furthermore weekends are more sensitive to temperature change then weekdays.

**Precipitation**

For precipitation, daily exposure was quantified using the summed daily precipitation.

Again we defined three classes:

$$
\begin{aligned}
\text{L (Low):} \quad & 0~\text{mm} \\
\text{M (Medium):} \quad & 0 < P_{\text{day}} \leq 7.1~\text{mm} \\
\text{H (High):} \quad & P_{\text{day}} > 7.1~\text{mm}
\end{aligned}
$$

implemented in `precip_range`.

In [6]:
def precip_ranges(col):
  return (
    pl.when(col == 0)
    .then(pl.lit("L"))
    .when((col > 0) & (col <= 5))
    .then(pl.lit("M"))
    .otherwise(pl.lit("H"))
  )


As before we will calculate

$$
\text{rel\_diff}_X = \frac{\bar{C}_X - \bar{C}_L}{\bar{C}_L} \times 100 
\quad \text{for } X \in \{M, H\}
$$

In [7]:
df = base_df(loader=dl, usage_probs=usage_probs).drop(["temp_max", "wind_max"])
df = df.with_columns(
    precip_ranges(pl.col("precip_sum")).alias("precip_range")
)

precip_table = event_effect_table(
    df,
    range_col="precip_range",
    group_cols=("station", "usage_type", "day_type"),
    agg_cols=("usage_type", "day_type"),
)

precip_table

usage_type,day_type,mean_count_M,mean_count_L,mean_count_H,rel_diff_M,rel_diff_L,rel_diff_H
str,str,f64,f64,f64,f64,f64,f64
"""mixed""","""weekday""",1793.623907,2099.746753,1418.325397,-17.2,0.0,-37.09
"""mixed""","""weekend""",1316.484,1586.577869,955.565217,-19.41,0.0,-46.03
"""recreational""","""weekday""",1318.321429,1509.688596,1051.587302,-9.22,0.0,-22.38
"""recreational""","""weekend""",1129.962428,1274.750725,827.986577,-11.83,0.0,-28.94
"""utilitarian""","""weekday""",4252.88601,4810.272,3451.756944,-8.49,0.0,-23.46
"""utilitarian""","""weekend""",3281.972125,3822.520147,2404.801724,-13.27,0.0,-28.83


**Wind Speed**

Daily wind conditions were represented by the mean daily wind speed, as wind affects cycling continuously throughout the day.

$$
\begin{aligned}
\text{L (low):} \quad & \max(W_{\text{day}}) < 12~\text{m/s} \\
\text{M (medium):} \quad & 12 \leq \max(W_{\text{day}}) < 16.8~\text{m/s} \\
\text{H (high):} \quad & \max(W_{\text{day}}) \geq 16.8~\text{m/s}
\end{aligned}
$$

implemented in `wind_ranges`.

In [8]:
def wind_ranges(col):
  return (
    pl.when(col < 12).then(pl.lit("L"))
    .when(col < 16.8).then(pl.lit("M"))
    .otherwise(pl.lit("H"))
  )

In [9]:
df = base_df(loader=dl, usage_probs=usage_probs).drop(["precip_sum", "temp_max"])
df = df.with_columns(
    wind_ranges(pl.col("wind_max")).alias("wind_range")
)

wind_table = event_effect_table(
    df,
    range_col="wind_range",
    group_cols=("station", "usage_type", "day_type"),
    agg_cols=("usage_type", "day_type"),
)

wind_table

usage_type,day_type,mean_count_H,mean_count_M,mean_count_L,rel_diff_H,rel_diff_M,rel_diff_L
str,str,f64,f64,f64,f64,f64,f64
"""mixed""","""weekday""",1700.830986,1900.698413,1986.024896,-15.48,-0.44,0.0
"""mixed""","""weekend""",1228.293839,1482.79703,1418.861272,-20.22,0.91,0.0
"""recreational""","""weekday""",1219.450602,1433.997175,1423.244318,-11.56,-0.39,0.0
"""recreational""","""weekend""",1045.107937,1206.474048,1170.533898,-10.82,3.07,0.0
"""utilitarian""","""weekday""",3983.821317,4497.608247,4616.519031,-13.48,-1.73,0.0
"""utilitarian""","""weekend""",2990.891566,3515.107884,3615.903226,-17.16,1.04,0.0
