In [162]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from tabulate import tabulate

from sklearn.manifold import TSNE
import plotly.express as px
from collections import defaultdict

In [163]:
# load the data for each station
city = "Stadt_Heidelberg"
folder = f'../../data/processed/cycle_counter/{city}'

files = os.listdir(f'../../data/processed/cycle_counter/Stadt_Heidelberg/')
stations = [os.path.splitext(f)[0] for f in files if f.endswith('.csv')]

## Data Loaders

In [164]:
import pandas as pd

# will be filled when data is used
coordinates = {}
file_name_mapping = {}

def import_data(station):
    df = pd.read_csv(f'{folder}/{station}.csv')

    df['iso_timestamp'] = pd.to_datetime(df['iso_timestamp'], utc=True, errors='coerce')
    df['iso_timestamp'].dropna()
    df['iso_timestamp'] = df['iso_timestamp'].dt.tz_convert('Europe/Berlin')
    
    station_name = df['counter_site'][0]

    coordinates[station_name] = (df['latitude'][0], df['longitude'][0])
    file_name_mapping[station_name] = station

    return station_name, df

def coordinates_by_station_name(station_name):
    return coordinates.get(station_name, (None, None))

def coordinates_by_file_name(file_name):
    for name, fname in file_name_mapping.items():
        if fname == file_name:
            return coordinates.get(name, (None, None))
    return (None, None)

def get_file_name_by_station_name(station_name):
    return file_name_mapping.get(station_name, None)

def get_station_name_by_file_name(file_name):
    for name, fname in file_name_mapping.items():
        if fname == file_name:
            return name
    return None

def get_daily_data(station, year):
    name, df = import_data(station)

    by_year = df[df['iso_timestamp'].dt.year == year]

    daily_sum = by_year.groupby(by_year['iso_timestamp'].dt.date)['channels_all'].sum().reset_index()
    daily_sum.rename(columns={'channels_all': 'total'}, inplace=True)

    # I already did this but need it again? idc
    daily_sum['iso_timestamp'] = pd.to_datetime(daily_sum['iso_timestamp'])
    return name, daily_sum

def get_yearly_data(station, year):
    name, df = import_data(station)

    by_year = df[df['iso_timestamp'].dt.year == year]

    yearly_sum = by_year.groupby(by_year['iso_timestamp'].dt.year)['channels_all'].sum().reset_index()
    yearly_sum.rename(columns={'channels_all': 'total'}, inplace=True)

    return name, yearly_sum

## FFT Handling

In [165]:
def get_fft_components(y, n_components=4):
    n = len(y)
    y_mean = y.mean()
    y_centered = y - y_mean

    fft = np.fft.fft(y_centered)
    freqs = np.fft.fftfreq(n, d=1.0)

    pos_idx = np.where(freqs > 0)[0]
    mags = np.abs(fft[pos_idx])
    top = pos_idx[np.argsort(mags)[-n_components:]][::-1]

    palette = ['red','green','orange','blue','purple']
    components = {}
    reconstruction = np.zeros(n)
    top_freq = []
    for i, k in enumerate(top, start=1):
        spec = np.zeros_like(fft, dtype=complex)

        # no this is not a bug, we need both sides of the spectrum for proper IFFT
        spec[k] = fft[k]
        spec[-k] = fft[-k]

        periodic_signal = np.fft.ifft(spec).real
        comp_ts = periodic_signal + y_mean
        components[f'comp_{i}'] = {
            'ts': comp_ts,
            'period_days': 1.0 / freqs[k],
            'color': palette[(i-1) % len(palette)]
        }
        reconstruction += periodic_signal
        top_freq.append(1.0 / freqs[k])
    
    reconstruction += y_mean

    components['reconstruction'] = reconstruction
    components['freqs'] = freqs
    components['index'] = np.arange(n)
    components['mean'] = y_mean
    components['top_freq'] = sorted(top_freq, reverse=True)
    return components

## Plot One Station

In [166]:
def plot_yearly_trend_traces(daily_sum):
    daily_sum = daily_sum.copy()
    daily_sum['iso_timestamp'] = pd.to_datetime(daily_sum['iso_timestamp'])
    ema = daily_sum['total'].ewm(span=2, adjust=False).mean()

    traces = []
    month_groups = {
        (1, 2): ('Winter', 'black'),
        (12,): ('Winter', 'black'),
        (3, 4, 5): ('Spring', 'green'),
        (6, 7, 8): ('Summer', 'red'),
        (9, 10, 11): ('Autumn', 'orange'),
    }

    shown = set()
    for months, (label, color) in month_groups.items():
        mask = daily_sum['iso_timestamp'].dt.month.isin(list(months))
        showleg = label not in shown
        traces.append(go.Scatter(
            x=daily_sum.loc[mask, 'iso_timestamp'],
            y=daily_sum.loc[mask, 'total'],
            mode='lines',
            name=label,
            line=dict(color=color),
            showlegend=showleg,
            hovertemplate='Date: %{x}<br>Cyclists: %{y}<extra></extra>'
        ))
        shown.add(label)

    traces.append(go.Scatter(
        x=daily_sum['iso_timestamp'],
        y=ema,
        mode='lines',
        name=f'EMA',
        line=dict(color='grey', dash='dash'),
        opacity=0.5,
        hoverinfo='skip'
    ))
    return traces

def fft_components_traces(daily_sum, n_components=4):
    ds = daily_sum.sort_values('iso_timestamp').copy()
    ds.set_index(pd.DatetimeIndex(ds['iso_timestamp']), inplace=True)
    ds = ds.asfreq('D')
    ds['total'] = ds['total'].interpolate().bfill().ffill()
    y = ds['total'].to_numpy()

    components = get_fft_components(y, n_components=n_components)
    traces = []

    # original
    traces.append(go.Scatter(
        x=ds.index, y=y, mode='lines', name='Original',
        line=dict(color='black'),
        hovertemplate='Date: %{x}<br>Total: %{y}<extra></extra>'
    ))

    # components
    for i in range(1, n_components+1):
        comp = components[f'comp_{i}']
        traces.append(go.Scatter(
            x=ds.index, y=comp['ts'], mode='lines',
            name=f"{comp['period_days']:.1f} Days",
            line=dict(color=comp['color']),
            opacity=0.7,
            hoverinfo='skip'
        ))

    # reconstruction trace
    traces.append(go.Scatter(
        x=ds.index, y=components['reconstruction'], mode='lines',
        name='Reconstruction',
        line=dict(color='grey'),
        hoverinfo='skip'
    ))
    return traces

def plot_year_and_fft(station, year, n_components=4):
    name, daily_sum = get_daily_data(station, year)

    # Plot 1: Trend
    fig1 = go.Figure()
    for tr in plot_yearly_trend_traces(daily_sum):
        fig1.add_trace(tr)
    fig1.update_layout(
        title=f"Daily Amount of Cyclists - {name} ({year})",
        xaxis_title="Date",
        yaxis_title="Number of Cyclists",
        template="plotly_white"
    )
    fig1.show()

    # Plot 2: FFT
    fig2 = go.Figure()
    for tr in fft_components_traces(daily_sum, n_components=n_components):
        fig2.add_trace(tr)
    fig2.update_layout(
        title=f"FFT Components - {name} ({year})",
        xaxis_title="Date",
        yaxis_title="Value",
        template="plotly_white"
    )
    fig2.show()

plot_year_and_fft('station_100012161', 2024)

## Are Frequencies a Recurring Pattern?

In [167]:
def frequencies_table_tabulate(year, n_components=6):
    rows = []
    for i, station in enumerate(stations):
        name, daily_sum = get_daily_data(station, year)
        if daily_sum.empty:
            continue
            
        ds = daily_sum.sort_values('iso_timestamp').copy()
        ds.set_index(pd.DatetimeIndex(ds['iso_timestamp']), inplace=True)
        ds = ds.asfreq('D')
        ds['total'] = ds['total'].interpolate().bfill().ffill()
        y = ds['total'].to_numpy()
        top_freq = get_fft_components(y, n_components=n_components)['top_freq']

        row = [name] + [f"{freq:.3f}" for freq in top_freq]
        rows.append(row)
    headers = ['Station'] + [f'Comp {i+1}' for i in range(n_components)]
    print(tabulate(rows, headers=headers, tablefmt="github"))

    return np.array(rows)

top_freq = frequencies_table_tabulate(2022)

| Station                         |   Comp 1 |   Comp 2 |   Comp 3 |   Comp 4 |   Comp 5 |   Comp 6 |
|---------------------------------|----------|----------|----------|----------|----------|----------|
| Mannheimer Stra√üe               |      365 |  182.5   |  121.667 |   91.25  |   60.833 |    7.019 |
| Thedor-Heuss-Br√ºcke Querschnitt |      365 |  182.5   |  121.667 |   52.143 |    7.019 |    3.51  |
| Schlierbacher Landstra√üe        |      165 |   82.5   |   55     |   18.333 |    8.25  |    6.875 |
| Rohrbacher Stra√üe Querschnitt   |      365 |  182.5   |  121.667 |   60.833 |    7.019 |    3.51  |
| Ernst-Walz-Br√ºcke Querschnitt   |      365 |  182.5   |  121.667 |   52.143 |    7.019 |    3.51  |
| Liebermannstra√üe                |      365 |  121.667 |   60.833 |   52.143 |    7.019 |    3.51  |
| Ziegelh√§user Landstra√üe         |      337 |  168.5   |   84.25  |   56.167 |   48.143 |   37.444 |
| Bahnstadtpromenade              |      334 |  167     |  111.333 |   55.

In [168]:
names = top_freq[:, 0]
freq = top_freq[:, 1:5].astype(float)

In [169]:
tsne = TSNE(n_components=1, perplexity=freq.shape[1])
emb = tsne.fit_transform(freq)

grouped = defaultdict(list)
for value, name in zip(emb[:, 0], names):
    grouped[round(value, 6)].append(name)

print(len(grouped), "unique t-SNE values found.")

fig = px.scatter(
    x=emb[:, 0],
    hover_name=names,
    title="1D t-SNE of the first three frequency components",
    labels={'x': 't-SNE 1'}
)
fig.update_yaxes(visible=False, showticklabels=False)
fig.show()

10 unique t-SNE values found.


In [170]:
yearly_cyclists = {}

for station in stations:
    yearly_df = get_yearly_data(station, 2022)[1]

    if yearly_df.empty:
        continue
    
    yearly_cyclists[station] = yearly_df['total'].values[0]

values = np.array(list(yearly_cyclists.values()), dtype=float)

min_val = np.min(values)
max_val = np.max(values)
norm_values = (values - min_val) / (max_val - min_val + 1e-9)

for i, key in enumerate(yearly_cyclists.keys()):
    yearly_cyclists[key] = norm_values[i]

yearly_cyclists

{'station_100013034': np.float64(0.23037654887712505),
 'station_100048812': np.float64(0.9999999999999997),
 'station_100049883': np.float64(0.0),
 'station_100048813': np.float64(0.36626752355465253),
 'station_100048811': np.float64(0.9366343603035089),
 'station_100048814': np.float64(0.520447731169386),
 'station_100049901': np.float64(0.002679231432751393),
 'station_100056770': np.float64(0.2660883255351865),
 'station_100012608': np.float64(0.5754641619382582),
 'station_100059184': np.float64(0.05642218765517899),
 'station_100012161': np.float64(0.38408588619400735),
 'station_100059187': np.float64(0.08190093149385484),
 'station_100056769': np.float64(0.1724690978515771),
 'station_100050750': np.float64(0.14326280217176068)}

In [174]:
import folium

locations = []

colors = ['red', 'blue', 'green', 'purple', 'orange', 'pink', 'gray', 'black', 'lightblue', 'lightgreen']

for idx, cluster in enumerate(grouped.values()):
    for station_name in cluster:
        lat, lon = coordinates_by_station_name(station_name)
        if lat is not None and lon is not None:
            locations.append((lat, lon, station_name, colors[idx]))

avg_lat = sum([loc[0] for loc in locations]) / len(locations)
avg_lon = sum([loc[1] for loc in locations]) / len(locations)

m = folium.Map(location=[avg_lat, avg_lon], zoom_start=13)

for lat, lon, name, color in locations:
    folium.Marker([lat, lon], popup=name, icon=folium.Icon(color=color)).add_to(m)
m

In [184]:
import folium

locations = []

m = folium.Map(location=[avg_lat, avg_lon], zoom_start=13)

for station, norm_value in yearly_cyclists.items():
    name = get_station_name_by_file_name(station)
    lat, lon = coordinates_by_file_name(station)
    if lat is not None and lon is not None:
        intensity = int(norm_value * 255)
        color = f'#{intensity:02x}00{255 - intensity:02x}' 
        
        folium.CircleMarker(
        location=[lat, lon],
        radius=20 * norm_value,
        popup=f"{name}, Norm: {norm_value:.4f}",
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=1.0
    ).add_to(m)

m