In [1]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from tabulate import tabulate

from sklearn.manifold import TSNE
import plotly.express as px
from collections import defaultdict

In [2]:
# load the data for each station
city = "Stadt_Heidelberg"
folder = f'../../data/processed/cycle_counter/{city}'

files = os.listdir(f'../../data/processed/cycle_counter/Stadt_Heidelberg/')
stations = [os.path.splitext(f)[0] for f in files if f.endswith('.csv')]

In [3]:
import pandas as pd

# will be filled when data is used
coordinates = {}
file_name_mapping = {}

def import_data(station):
    df = pd.read_csv(f'{folder}/{station}.csv')

    df['iso_timestamp'] = pd.to_datetime(df['iso_timestamp'], utc=True, errors='coerce')
    df['iso_timestamp'].dropna()
    df['iso_timestamp'] = df['iso_timestamp'].dt.tz_convert('Europe/Berlin')
    
    station_name = df['counter_site'][0]

    coordinates[station_name] = (df['latitude'][0], df['longitude'][0])
    file_name_mapping[station_name] = station

    return station_name, df

def coordinates_by_station_name(station_name):
    return coordinates.get(station_name, (None, None))

def coordinates_by_file_name(file_name):
    for name, fname in file_name_mapping.items():
        if fname == file_name:
            return coordinates.get(name, (None, None))
    return (None, None)

def get_file_name_by_station_name(station_name):
    return file_name_mapping.get(station_name, None)

def get_station_name_by_file_name(file_name):
    for name, fname in file_name_mapping.items():
        if fname == file_name:
            return name
    return None

def get_daily_data(station, year):
    name, df = import_data(station)

    by_year = df[df['iso_timestamp'].dt.year == year]

    daily_sum = by_year.groupby(by_year['iso_timestamp'].dt.date)['channels_all'].sum().reset_index()
    daily_sum.rename(columns={'channels_all': 'total'}, inplace=True)

    # I already did this but need it again? idc
    daily_sum['iso_timestamp'] = pd.to_datetime(daily_sum['iso_timestamp'])
    return name, daily_sum

def get_yearly_data(station, year):
    name, df = import_data(station)

    by_year = df[df['iso_timestamp'].dt.year == year]

    yearly_sum = by_year.groupby(by_year['iso_timestamp'].dt.year)['channels_all'].sum().reset_index()
    yearly_sum.rename(columns={'channels_all': 'total'}, inplace=True)

    return name, yearly_sum