In [70]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from tabulate import tabulate

In [71]:
# load the data for each station
city = "Stadt_Heidelberg"
folder = f'../../data/processed/cycle_counter/{city}'

files = os.listdir(f'../../data/processed/cycle_counter/Stadt_Heidelberg/')
stations = [os.path.splitext(f)[0] for f in files if f.endswith('.csv')]

## Data Loaders

In [72]:
import pandas as pd

def import_data(station):
    df = pd.read_csv(f'{folder}/{station}.csv')

    df['iso_timestamp'] = pd.to_datetime(df['iso_timestamp'], utc=True, errors='coerce')
    df['iso_timestamp'].dropna()
    df['iso_timestamp'] = df['iso_timestamp'].dt.tz_convert('Europe/Berlin')
    
    station_name = df['counter_site'][0]
    
    return station_name, df

def get_daily_data(station, year):
    name, df = import_data(station)

    by_year = df[df['iso_timestamp'].dt.year == year]

    daily_sum = by_year.groupby(by_year['iso_timestamp'].dt.date)['channels_all'].sum().reset_index()
    daily_sum.rename(columns={'channels_all': 'total'}, inplace=True)

    # I already did this but need it again? idc
    daily_sum['iso_timestamp'] = pd.to_datetime(daily_sum['iso_timestamp'])
    return name, daily_sum

## FFT Handling

In [73]:
def get_fft_components(y, n_components=4):
    n = len(y)
    y_mean = y.mean()
    y_centered = y - y_mean

    fft = np.fft.fft(y_centered)
    freqs = np.fft.fftfreq(n, d=1.0)

    pos_idx = np.where(freqs > 0)[0]
    mags = np.abs(fft[pos_idx])
    top = pos_idx[np.argsort(mags)[-n_components:]][::-1]

    palette = ['red','green','orange','blue','purple']
    components = {}
    reconstruction = np.zeros(n)
    top_freq = []
    for i, k in enumerate(top, start=1):
        spec = np.zeros_like(fft, dtype=complex)

        # no this is not a bug, we need both sides of the spectrum for proper IFFT
        spec[k] = fft[k]
        spec[-k] = fft[-k]

        periodic_signal = np.fft.ifft(spec).real
        comp_ts = periodic_signal + y_mean
        components[f'comp_{i}'] = {
            'ts': comp_ts,
            'period_days': 1.0 / freqs[k],
            'color': palette[(i-1) % len(palette)]
        }
        reconstruction += periodic_signal
        top_freq.append(1.0 / freqs[k])
    
    reconstruction += y_mean

    components['reconstruction'] = reconstruction
    components['freqs'] = freqs
    components['index'] = np.arange(n)
    components['mean'] = y_mean
    components['top_freq'] = top_freq
    return components

## Plot One Station

In [74]:
def plot_yearly_trend_traces(daily_sum):
    daily_sum = daily_sum.copy()
    daily_sum['iso_timestamp'] = pd.to_datetime(daily_sum['iso_timestamp'])
    ema = daily_sum['total'].ewm(span=2, adjust=False).mean()

    traces = []
    month_groups = {
        (1, 2): ('Winter', 'black'),
        (12,): ('Winter', 'black'),
        (3, 4, 5): ('Spring', 'green'),
        (6, 7, 8): ('Summer', 'red'),
        (9, 10, 11): ('Autumn', 'orange'),
    }

    shown = set()
    for months, (label, color) in month_groups.items():
        mask = daily_sum['iso_timestamp'].dt.month.isin(list(months))
        showleg = label not in shown
        traces.append(go.Scatter(
            x=daily_sum.loc[mask, 'iso_timestamp'],
            y=daily_sum.loc[mask, 'total'],
            mode='lines',
            name=label,
            line=dict(color=color),
            showlegend=showleg,
            hovertemplate='Date: %{x}<br>Cyclists: %{y}<extra></extra>'
        ))
        shown.add(label)

    traces.append(go.Scatter(
        x=daily_sum['iso_timestamp'],
        y=ema,
        mode='lines',
        name=f'EMA',
        line=dict(color='grey', dash='dash'),
        opacity=0.5,
        hoverinfo='skip'
    ))
    return traces

def fft_components_traces(daily_sum, n_components=4):
    ds = daily_sum.sort_values('iso_timestamp').copy()
    ds.set_index(pd.DatetimeIndex(ds['iso_timestamp']), inplace=True)
    ds = ds.asfreq('D')
    ds['total'] = ds['total'].interpolate().bfill().ffill()
    y = ds['total'].to_numpy()

    components = get_fft_components(y, n_components=n_components)
    traces = []

    # original
    traces.append(go.Scatter(
        x=ds.index, y=y, mode='lines', name='Original',
        line=dict(color='black'),
        hovertemplate='Date: %{x}<br>Total: %{y}<extra></extra>'
    ))

    # components
    for i in range(1, n_components+1):
        comp = components[f'comp_{i}']
        traces.append(go.Scatter(
            x=ds.index, y=comp['ts'], mode='lines',
            name=f"{comp['period_days']:.1f} Days",
            line=dict(color=comp['color']),
            opacity=0.7,
            hoverinfo='skip'
        ))

    # reconstruction trace
    traces.append(go.Scatter(
        x=ds.index, y=components['reconstruction'], mode='lines',
        name='Reconstruction',
        line=dict(color='grey'),
        hoverinfo='skip'
    ))
    return traces

def plot_year_and_fft(station, year, n_components=4):
    name, daily_sum = get_daily_data(station, year)

    # Plot 1: Trend
    fig1 = go.Figure()
    for tr in plot_yearly_trend_traces(daily_sum):
        fig1.add_trace(tr)
    fig1.update_layout(
        title=f"Daily Amount of Cyclists - {name} ({year})",
        xaxis_title="Date",
        yaxis_title="Number of Cyclists",
        template="plotly_white"
    )
    fig1.show()

    # Plot 2: FFT
    fig2 = go.Figure()
    for tr in fft_components_traces(daily_sum, n_components=n_components):
        fig2.add_trace(tr)
    fig2.update_layout(
        title=f"FFT Components - {name} ({year})",
        xaxis_title="Date",
        yaxis_title="Value",
        template="plotly_white"
    )
    fig2.show()

plot_year_and_fft('station_100012161', 2024)

## Are Frequencies a Recurring Pattern?

In [75]:
def frequencies_table_tabulate(year, n_components=6):
    rows = []
    for station in stations:
        name, daily_sum = get_daily_data(station, year)
        if daily_sum.empty:
            continue
        ds = daily_sum.sort_values('iso_timestamp').copy()
        ds.set_index(pd.DatetimeIndex(ds['iso_timestamp']), inplace=True)
        ds = ds.asfreq('D')
        ds['total'] = ds['total'].interpolate().bfill().ffill()
        y = ds['total'].to_numpy()
        top_freq = sorted(get_fft_components(y, n_components=n_components)['top_freq'], reverse=True)
        row = [name] + [f"{freq:.3f}" for freq in top_freq]
        rows.append(row)
    headers = ['Station'] + [f'Comp {i+1}' for i in range(n_components)]
    print(tabulate(rows, headers=headers, tablefmt="github"))

frequencies_table_tabulate(2024)

| Station                         |   Comp 1 |   Comp 2 |   Comp 3 |   Comp 4 |   Comp 5 |   Comp 6 |
|---------------------------------|----------|----------|----------|----------|----------|----------|
| Mannheimer Stra√üe               |  366     |  183     |   91.5   |   52.286 |    7.038 |    6.906 |
| Thedor-Heuss-Br√ºcke Querschnitt |  366     |  122     |   73.2   |   45.75  |    7.038 |    3.486 |
| Schlierbacher Landstra√üe        |    2.194 |    2.159 |    2.125 |    2.092 |    2.061 |    2.03  |
| Rohrbacher Stra√üe Querschnitt   |  366     |  183     |   73.2   |   45.75  |   21.529 |    7.038 |
| Ernst-Walz-Br√ºcke Querschnitt   |  366     |   73.2   |    7.038 |    6.906 |    3.519 |    3.486 |
| Liebermannstra√üe                |    2.068 |    2.056 |    2.045 |    2.033 |    2.022 |    2.011 |
| Ziegelh√§user Landstra√üe         |    2.092 |    2.076 |    2.06  |    2.045 |    2.03  |    2.015 |
| Bahnstadtpromenade              |  365     |  182.5   |  121.667 |   91.