In [1]:
from data_io.loader.data_loader import DataLoader
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from tabulate import tabulate

from sklearn.manifold import TSNE
import plotly.express as px
from collections import defaultdict

## Data Loaders

In [None]:
data_loader = DataLoader()

## FFT Handling

In [3]:
def get_fft_components(y, n_components=4):
    n = len(y)
    y_mean = y.mean()
    y_centered = y - y_mean

    fft = np.fft.fft(y_centered)
    freqs = np.fft.fftfreq(n, d=1.0)

    pos_idx = np.where(freqs > 0)[0]
    mags = np.abs(fft[pos_idx])
    top = pos_idx[np.argsort(mags)[-n_components:]][::-1]

    palette = ['red','green','orange','blue','purple']
    components = {}
    reconstruction = np.zeros(n)
    top_freq = []
    for i, k in enumerate(top, start=1):
        spec = np.zeros_like(fft, dtype=complex)

        # no this is not a bug, we need both sides of the spectrum for proper IFFT
        spec[k] = fft[k]
        spec[-k] = fft[-k]

        periodic_signal = np.fft.ifft(spec).real
        comp_ts = periodic_signal + y_mean
        components[f'comp_{i}'] = {
            'ts': comp_ts,
            'period_days': 1.0 / freqs[k],
            'color': palette[(i-1) % len(palette)]
        }
        reconstruction += periodic_signal
        top_freq.append(1.0 / freqs[k])
    
    reconstruction += y_mean

    components['reconstruction'] = reconstruction
    components['freqs'] = freqs
    components['index'] = np.arange(n)
    components['mean'] = y_mean
    components['top_freq'] = sorted(top_freq, reverse=True)
    return components

## Plot One Station

In [4]:
def plot_yearly_trend_traces(daily_sum):
    ema = daily_sum['channels_all'].ewm(span=2, adjust=False).mean()

    traces = []
    month_groups = {
        (1, 2): ('Winter', 'black'),
        (12,): ('Winter', 'black'),
        (3, 4, 5): ('Spring', 'green'),
        (6, 7, 8): ('Summer', 'red'),
        (9, 10, 11): ('Autumn', 'orange'),
    }

    shown = set()
    for months, (label, color) in month_groups.items():
        mask = daily_sum['datetime'].dt.month.isin(list(months))
        showleg = label not in shown
        traces.append(go.Scatter(
            x=daily_sum.loc[mask, 'datetime'],
            y=daily_sum.loc[mask, 'channels_all'],
            mode='lines',
            name=label,
            line=dict(color=color),
            showlegend=showleg,
            hovertemplate='Date: %{x}<br>Cyclists: %{y}<extra></extra>'
        ))
        shown.add(label)

    traces.append(go.Scatter(
        x=daily_sum['datetime'],
        y=ema,
        mode='lines',
        name=f'EMA',
        line=dict(color='grey', dash='dash'),
        opacity=0.5,
        hoverinfo='skip'
    ))
    return traces

def fft_components_traces(daily_sum, n_components=4):
    daily_sum.set_index(pd.DatetimeIndex(daily_sum['datetime']), inplace=True)
    daily_sum = daily_sum.asfreq('D')
    daily_sum['channels_all'] = daily_sum['channels_all'].interpolate().bfill().ffill()
    y = daily_sum['channels_all'].to_numpy()

    components = get_fft_components(y, n_components=n_components)
    traces = []

    # original
    traces.append(go.Scatter(
        x=daily_sum.index, y=y, mode='lines', name='Original',
        line=dict(color='black'),
        hovertemplate='Date: %{x}<br>total: %{y}<extra></extra>'
    ))

    # components
    for i in range(1, n_components+1):
        comp = components[f'comp_{i}']
        traces.append(go.Scatter(
            x=daily_sum.index, y=comp['ts'], mode='lines',
            name=f"{comp['period_days']:.1f} Days",
            line=dict(color=comp['color']),
            opacity=0.7,
            hoverinfo='skip'
        ))

    # reconstruction trace
    traces.append(go.Scatter(
        x=daily_sum.index, y=components['reconstruction'], mode='lines',
        name='Reconstruction',
        line=dict(color='grey'),
        hoverinfo='skip'
    ))
    return traces

def plot_year_and_fft(station, year, n_components=4):
    daily_sum = data_loader.get_bicycle_pandas(station, interval=('2024-01-01', '2025-01-01'), sample_rate='1d')

    # Plot 1: Trend
    fig1 = go.Figure()
    for tr in plot_yearly_trend_traces(daily_sum):
        fig1.add_trace(tr)
    fig1.update_layout(
        title=f"Daily Amount of Cyclists - {station} ({year})",
        xaxis_title="Date",
        yaxis_title="Number of Cyclists",
        template="plotly_white"
    )
    fig1.show()

    # Plot 2: FFT
    fig2 = go.Figure()
    for tr in fft_components_traces(daily_sum, n_components=n_components):
        fig2.add_trace(tr)
    fig2.update_layout(
        title=f"FFT Components - {station} ({year})",
        xaxis_title="Date",
        yaxis_title="Value",
        template="plotly_white"
    )
    fig2.show()

plot_year_and_fft('Pl√∂ck', 2024)

## Are Frequencies a Recurring Pattern?

In [89]:
def frequencies_table_tabulate(year, n_components=6):
    rows = []
    
    for station in data_loader.get_bicyle_stations():
        daily_sum = data_loader.get_bicycle_pandas(station, interval=('2022-01-01', '2023-01-01'), sample_rate='1d')
        if daily_sum.empty:
            continue
            
        ds = daily_sum.sort_values('datetime').copy()
        ds.set_index(pd.DatetimeIndex(ds['datetime']), inplace=True)
        ds = ds.asfreq('D')
        ds['channels_all'] = ds['channels_all'].interpolate().bfill().ffill()
        y = ds['channels_all'].to_numpy()
        top_freq = get_fft_components(y, n_components=n_components)['top_freq']

        row = [station] + [f"{freq:.3f}" for freq in top_freq]
        rows.append(row)
    headers = ['Station'] + [f'Comp {i+1}' for i in range(n_components)]
    print(tabulate(rows, headers=headers, tablefmt="github"))

    return np.array(rows)

top_freq = frequencies_table_tabulate(2022)

| Station                         |   Comp 1 |   Comp 2 |   Comp 3 |   Comp 4 |   Comp 5 |   Comp 6 |
|---------------------------------|----------|----------|----------|----------|----------|----------|
| Ziegelh√§user Landstra√üe         |      337 |  168.5   |   84.25  |   56.167 |   48.143 |   33.7   |
| Bahnstadtpromenade              |      334 |  167     |  111.333 |   55.667 |    7.106 |    6.958 |
| Berliner Stra√üe Querschnitt     |      365 |  182.5   |   91.25  |   52.143 |    7.019 |    3.51  |
| Gaisbergstra√üe                  |      365 |  182.5   |  121.667 |   52.143 |    7.019 |    3.51  |
| Eppelheimer Str. Querschnitt    |      365 |  182.5   |  121.667 |   91.25  |    7.019 |    3.51  |
| Kurf√ºrstenanlage Querschnitt    |      365 |  121.667 |   91.25  |    7.019 |    3.51  |    2.325 |
| Pl√∂ck                           |      365 |  182.5   |   91.25  |   73     |    7.019 |    6.887 |
| Liebermannstra√üe                |      365 |  121.667 |   60.833 |   52.1

In [90]:
names = top_freq[:, 0]
freq = top_freq[:, 1:5].astype(float)

In [91]:
tsne = TSNE(n_components=1, perplexity=freq.shape[1])
emb = tsne.fit_transform(freq)

grouped = defaultdict(list)
for value, name in zip(emb[:, 0], names):
    grouped[round(value, 6)].append(name)

print(len(grouped), "unique t-SNE values found.")

fig = px.scatter(
    x=emb[:, 0],
    hover_name=names,
    title="1D t-SNE of the first three frequency components",
    labels={'x': 't-SNE 1'}
)
fig.update_yaxes(visible=False, showticklabels=False)
fig.show()

12 unique t-SNE values found.


In [92]:
yearly_cyclists = {}

for station in data_loader.get_bicyle_stations():
    daily_sum = data_loader.get_bicycle_pandas(station, interval=('2022-01-01', '2023-01-01'), sample_rate='1d')

    if daily_sum.empty:
        continue
    
    yearly_cyclists[station] = daily_sum['channels_all'].values[0]

values = np.array(list(yearly_cyclists.values()), dtype=float)

min_val = np.min(values)
max_val = np.max(values)
norm_values = (values - min_val) / (max_val - min_val + 1e-9)

for i, key in enumerate(yearly_cyclists.keys()):
    yearly_cyclists[key] = norm_values[i]

yearly_cyclists

{'Ziegelh√§user Landstra√üe': np.float64(0.0529247910863264),
 'Bahnstadtpromenade': np.float64(0.5441039925717065),
 'Berliner Stra√üe Querschnitt': np.float64(0.0),
 'Gaisbergstra√üe': np.float64(0.4981429897862126),
 'Eppelheimer Str. Querschnitt': np.float64(0.03249767873721797),
 'Kurf√ºrstenanlage Querschnitt': np.float64(0.035747446610939766),
 'Pl√∂ck': np.float64(0.5041782729802673),
 'Liebermannstra√üe': np.float64(0.6039925719588654),
 'Schlierbacher Landstra√üe': np.float64(0.02506963788299672),
 'Thedor-Heuss-Br√ºcke Querschnitt': np.float64(0.9999999999995357),
 'Hardtstra√üe': np.float64(0.2130919220054721),
 'Rohrbacher Stra√üe Querschnitt': np.float64(0.3505106778085652),
 'Ernst-Walz-Br√ºcke Querschnitt': np.float64(0.8198700092846705),
 'Mannheimer Stra√üe': np.float64(0.22005571030630453)}

In [93]:
import folium

locations = []

colors = [
    'red', 'blue', 'green', 'purple', 'orange', 'pink', 'gray', 'black', 
    'lightblue', 'lightgreen', 'darkred', 'darkblue', 'darkgreen', 'cadetblue', 
    'darkpurple', 'beige', 'lightgray', 'lightred'
]

print(len(grouped))

for idx, cluster in enumerate(grouped.values()):
    for station_name in cluster:
        lat, lon = data_loader.get_bicycle_location(station_name)
        if lat is not None and lon is not None:
            locations.append((lat, lon, station_name, colors[idx]))

avg_lat = sum([loc[0] for loc in locations]) / len(locations)
avg_lon = sum([loc[1] for loc in locations]) / len(locations)

m = folium.Map(location=[avg_lat, avg_lon], zoom_start=13)

for lat, lon, name, color in locations:
    folium.Marker([lat, lon], popup=name, icon=folium.Icon(color=color)).add_to(m)
m

12


In [94]:
import folium

locations = []

m = folium.Map(location=[avg_lat, avg_lon], zoom_start=13)

for station, norm_value in yearly_cyclists.items():
    lat, lon = data_loader.get_bicycle_location(station)

    if lat is not None and lon is not None:
        intensity = int(norm_value * 255)
        color = f'#{intensity:02x}00{255 - intensity:02x}' 
        
        folium.CircleMarker(
        location=[lat, lon],
        radius=20 * norm_value,
        popup=f"{name}, Norm: {norm_value:.4f}",
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=1.0
    ).add_to(m)

m

# FFT Temperatures (one year)

In [95]:
def plot_weather_fft(year):
    df_weather = data_loader.get_weather(
        interval=(f"{year}-01-01", f"{year+1}-01-01"),
        sample_rate="1d"
    ).to_pandas()

    y = df_weather["temperature_2m"].interpolate().to_numpy()
    fft = get_fft_components(y)

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_weather["datetime"], y=y, name="Temperature"
    ))

    for i in range(1,4):
        comp = fft[f"comp_{i}"]
        fig.add_trace(go.Scatter(
            x=df_weather["datetime"], y=comp["ts"],
            name=f"{comp['period_days']:.1f} Days",
            opacity=0.5
        ))

    fig.update_layout(
        title=f"FFT Temperature {year}", template="plotly_white"
    )
    fig.show()
plot_weather_fft(2024)

calculate fft components for weather and counts

In [143]:
hourly_counts = data_loader.get_bicycle_pandas(
    station,
    interval=('2024-01-01', '2025-01-01'),
    sample_rate='1d'
)

hourly_weather = data_loader.get_weather(
    interval=('2024-01-01', '2025-01-01'),
    sample_rate='1d'
).to_pandas()

y_counts = hourly_counts['channels_all'].interpolate().to_numpy()
y_temp   = hourly_weather['temperature_2m'].interpolate().to_numpy()


In [151]:
fft_counts = get_fft_components(y_counts)
fft_temp   = get_fft_components(y_temp)