***
### Import of required libraries
***

In [1]:
import glob
from os.path import join as opj

import pandas as pd
import numpy as np

***
### Procesing of MeteoSchweiz T/RH/P data
***

In [2]:
# Get a list of all csv files containing weather data
weather_data_path = "/mnt/beegfs/store/MIAR/01_sources/meteo/T-RH_QFE-SMN_KLO"
list_csv = glob.glob(opj(weather_data_path, "*.csv"))

# Concatenate all csv files into a single pandas dataframe
df_weather = pd.concat(
    [
        pd.read_csv(
            f,
            sep=";",
            header=None,
            index_col=None,
            parse_dates=False,
            names=[
                "timestamp",
                "temperature_gnd",
                "humidity_gnd",
                "pressure_gnd",
            ],
        )
        for f in list_csv
    ],
    axis=0,
)

# Transform the timestamp column into a datetime object
df_weather["timestamp"] = pd.to_datetime(
    df_weather["timestamp"], format="%d.%m.%Y %H:%M:%S", utc=True
)

# Save dataframe as parquet file
df_weather.to_parquet("/mnt/beegfs/store/krum/MT/inputs/df_t_rh_p.parquet")

***
### Processing of MeteoSchweiz wind data
***

In [3]:
# Get a list of all csv files containing wind data
wind_data_path = "/mnt/beegfs/store/MIAR/01_sources/meteo/Wind_LSZH_2018-2023"
list_csv_C = glob.glob(
    opj(wind_data_path, "**/*Kloten_Wind_C*.csv.zip"), recursive=True
)

# Concatenate all csv files into a single pandas dataframe
df_wind = pd.concat(
    [
        pd.read_csv(
            f,
            sep=";",
            header=None,
            index_col=0,
            parse_dates=True,
            names=["date", "wind_speed_gnd", "wind_direction_gnd"],
            usecols=[0, 1, 2],
        )
        for f in list_csv_C
    ],
    axis=0,
).sort_index()

# Transform the timestamp column into a datetime object
df_wind = df_wind.reset_index().rename(columns={"date": "timestamp"})
df_wind["timestamp"] = df_wind["timestamp"].dt.tz_localize("UTC")

# Computation of 2min moving average for wind speed
df_wind["wind_speed_gnd_2min_avg"] = (
    df_wind["wind_speed_gnd"].rolling(window=40, min_periods=1).mean()
)

# Computation of 2min moving average for wind direction
wind_dir_radians = np.radians(df_wind["wind_direction_gnd"])
sin_component = np.sin(wind_dir_radians)
cos_component = np.cos(wind_dir_radians)
sin_avg = sin_component.rolling(window=40, min_periods=1).mean()
cos_avg = cos_component.rolling(window=40, min_periods=1).mean()
avg_direction_radians = np.arctan2(sin_avg, cos_avg)
avg_direction_degrees = np.degrees(avg_direction_radians)
df_wind["wind_direction_gnd_2min_avg"] = avg_direction_degrees % 360

# Computation of x and y components of the wind
df_wind["wind_x_2min_avg"] = -df_wind["wind_speed_gnd_2min_avg"] * np.sin(
    np.radians(df_wind["wind_direction_gnd_2min_avg"])
)
df_wind["wind_y_2min_avg"] = -df_wind["wind_speed_gnd_2min_avg"] * np.cos(
    np.radians(df_wind["wind_direction_gnd_2min_avg"])
)

# Save dataframe as parquet file
df_wind.to_parquet("/mnt/beegfs/store/krum/MT/inputs/df_wind.parquet")