## Flüge mit Ferien mergen

In [1]:
import pandas as pd
from pathlib import Path
import requests
import numpy as np
pd.set_option('display.max_columns', None)
# ===============================
# PARAMETER
# ===============================
PATH_FLIGHTS = "flights_with_weather_HALBHOUR_merged.csv"   # ggf. .parquet, siehe unten
PATH_VAC     = "ch_school_vacations_30min.csv"              # dein Ferien-Grid (30min)
OUT_FILE     = "flights_with_weather_vacations_HALBHOUR_merged.csv"

# Falls deine Dateien Parquet sind, entweder:
# PATH_FLIGHTS = "flights_with_weather_HALBHOUR_merged.parquet"
# PATH_VAC     = "ch_school_vacations_30min.parquet"

# ===============================
# HELFER
# ===============================
def read_any(path: str) -> pd.DataFrame:
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"Datei nicht gefunden: {p}")
    if p.suffix.lower() in [".parquet", ".pq"]:
        return pd.read_parquet(p)
    # Default: CSV
    return pd.read_csv(p)

def detect_time_col(df: pd.DataFrame) -> str:
    candidates = [c for c in df.columns]
    # häufige Namen, in bevorzugter Reihenfolge
    preferred = ["time", "timestamp", "datetime", "date_time", "DateTime", "Zeit", "ts"]
    for name in preferred:
        for c in candidates:
            if c.lower() == name.lower():
                return c
    # fallback: erste Datetime-ähnliche Spalte
    for c in df.columns:
        try:
            pd.to_datetime(df[c])
            return c
        except Exception:
            continue
    raise ValueError("Konnte keine Zeitspalte automatisch erkennen.")

def ensure_datetime(df: pd.DataFrame, col: str) -> pd.DataFrame:
    df = df.copy()
    df[col] = pd.to_datetime(df[col], utc=True, errors="raise")
    return df

# ===============================
# LADEN
# ===============================
df_f = read_any(PATH_FLIGHTS)
df_v = read_any(PATH_VAC)

col_f_time = detect_time_col(df_f)
col_v_time = detect_time_col(df_v)

df_f = ensure_datetime(df_f, col_f_time)
df_v = ensure_datetime(df_v, col_v_time)

# Einheitlicher Name für Merge
df_f = df_f.rename(columns={col_f_time: "ts"})
df_v = df_v.rename(columns={col_v_time: "ts"})

# Duplikate (Sicherheit) im Ferien-Grid entfernen
df_v = df_v.sort_values("ts").drop_duplicates(subset=["ts"], keep="last")

# Welche Spalten aus Ferien-Datei anhängen?
vac_cols = [c for c in df_v.columns if c != "ts"]

# ===============================
# MERGE (Ferien ans ENDE)
# ===============================
# Left-Join auf deinem Flights/Weather-Raster, dadurch bleibt die Zeilenanzahl exakt erhalten
merged = pd.merge(df_f, df_v[["ts"] + vac_cols], on="ts", how="left")

# Ferien-Spalten wirklich ans Ende verschieben (falls Merge sie dazwischen einsortiert hat)
base_cols = [c for c in merged.columns if c not in vac_cols]
merged = merged[base_cols + vac_cols]

# ===============================
# QUALITÄTSCHECKS
# ===============================
assert len(merged) == len(df_f), "Zeilenanzahl hat sich unerwartet verändert."
missing = merged[vac_cols].isna().all(axis=1).sum()
print(f"Info: {missing} Zeitschritte ohne Ferien-Match (ist normal, wenn ausserhalb Gültigkeit).")

# ===============================
# SPEICHERN & SCHNELL ANZEIGEN
# ===============================
# CSV Export
merged.to_csv(OUT_FILE, index=False)
print(f"✅ Fertig: {OUT_FILE}")

# Tabelle flott ansehen (z. B. in Jupyter):
merged


Info: 0 Zeitschritte ohne Ferien-Match (ist normal, wenn ausserhalb Gültigkeit).
✅ Fertig: flights_with_weather_vacations_HALBHOUR_merged.csv


Unnamed: 0,ts,interval_end,n_arrivals,n_departures,n_total,grid_time,station,air_temperature_2_m_above_ground_current_value_[degc],air_temperature_at_5_cm_above_grass_current_value_[degc],air_temperature_at_surface_current_value_[degc],chill_temperature_current_value_[degc],relative_air_humidity_2_m_above_ground_current_value_[percent],dew_point_2_m_above_ground_current_value_[degc],vapour_pressure_2_m_above_ground_current_value_[hpa],atmospheric_pressure_at_barometric_altitude_(qfe)_current_value_[hpa],pressure_reduced_to_sea_level_according_to_standard_atmosphere_(qnh)_current_value_[hpa],pressure_reduced_to_sea_level_(qff)_current_value_[hpa],geopotential_height_of_the_850_hpa_level_current_value_[gpm],geopotential_height_of_the_700_hpa_level_current_value_[gpm],gust_peak_(one_second)_maximum_in_m_per_s_[m_per_s],wind_speed_vectoriel_ten_minutes_mean_in_m_per_s_[m_per_s],wind_speed_scalar_ten_minutes_mean_in_m_per_s_[m_per_s],wind_direction_ten_minutes_mean_[deg],foehn_index_[code],wind_speed_ten_minutes_mean_in_km_per_h_[km_per_h],gust_peak_(three_seconds)_maximum_in_m_per_s_[m_per_s],gust_peak_(one_second)_maximum_in_km_per_h_[km_per_h],gust_peak_(three_seconds)_maximum_in_km_per_h_[km_per_h],precipitation_ten_minutes_total_[mm],snow_depth_(automatic_measurement)_current_value_[cm],global_radiation_ten_minutes_mean_[w_per_m²],diffuse_radiation_ten_minutes_mean_[w_per_m²],longwave_incoming_radiation_ten_minutes_mean_[w_per_m²],longwave_outgoing_radiation_ten_minute_mean_[w_per_m²],shortwave_reflected_radiation_ten_minute_mean_[w_per_m²],sunshine_duration_ten_minutes_total_[min],code,type,correction,mod,station_id,cycle,wind_dir,wind_speed,wind_gust,wind_dir_from,wind_dir_to,vis,vis_dir,max_vis,max_vis_dir,temp,dewpt,press,runway,weather,recent,sky,windshear,wind_speed_peak,wind_dir_peak,peak_wind_time,wind_shift_time,max_temp_6hr,min_temp_6hr,max_temp_24hr,min_temp_24hr,press_sea_level,precip_1hr,precip_3hr,precip_6hr,precip_24hr,snowdepth,ice_accretion_1hr,ice_accretion_3hr,ice_accretion_6hr,airport,AG,AI,AR,BE,BL,BS,FR,GE,GL,GR,JU,LU,NE,NW,OW,SG,SH,SO,SZ,TG,TI,UR,VD,VS,ZG,ZH,COUNT_ANY,ANY,SHARE_ANY
0,2023-01-01 05:50:00+00:00,2023-01-01 06:20:00,5,0,5,2023-01-01 05:50:00,KLO,13.0,9.9,9.1,13.0,66.3,6.9,9.9,973.0,1023.8,1023.5,,,7.3,4.0,4.1,224.0,,14.8,6.9,26.3,24.8,0.0,0.0,2,1.0,302,,,0,LSZH 010550Z 22007KT CAVOK 13/07 Q1023 NOSIG,METAR,,AUTO,LSZH,6.0,220 degrees,7 knots,,,,10000 meters,,,,13.0 C,7.0 C,1023.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,26,True,1.0
1,2023-01-01 06:20:00+00:00,2023-01-01 06:50:00,1,3,4,2023-01-01 06:20:00,KLO,12.5,9.7,9.0,12.5,68.4,6.8,9.9,973.2,1024.0,1023.8,,,6.1,4.1,4.1,233.0,,14.8,5.7,22.0,20.5,0.0,0.0,2,1.0,300,,,0,LSZH 010620Z 24007KT CAVOK 13/07 Q1024 NOSIG,METAR,,AUTO,LSZH,6.0,240 degrees,7 knots,,,,10000 meters,,,,13.0 C,7.0 C,1024.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,26,True,1.0
2,2023-01-01 06:50:00+00:00,2023-01-01 07:20:00,4,9,13,2023-01-01 06:50:00,KLO,13.1,10.8,10.1,13.1,65.4,6.8,9.9,973.2,1024.0,1023.7,,,9.3,6.2,6.3,231.0,,22.7,9.1,33.5,32.8,0.0,0.0,1,0.0,300,,,0,LSZH 010650Z 23010KT CAVOK 13/07 Q1024 NOSIG,METAR,,AUTO,LSZH,7.0,230 degrees,10 knots,,,,10000 meters,,,,13.0 C,7.0 C,1024.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,26,True,1.0
3,2023-01-01 07:20:00+00:00,2023-01-01 07:50:00,1,18,19,2023-01-01 07:20:00,KLO,12.8,10.7,10.0,12.8,66.3,6.7,9.8,973.4,1024.2,1024.0,,,9.0,5.3,5.4,235.0,,19.4,8.8,32.4,31.7,0.0,0.0,10,8.0,299,,,0,LSZH 010720Z 24007KT CAVOK 13/06 Q1024 NOSIG,METAR,,AUTO,LSZH,7.0,240 degrees,7 knots,,,,10000 meters,,,,13.0 C,6.0 C,1024.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,26,True,1.0
4,2023-01-01 07:50:00+00:00,2023-01-01 08:20:00,5,8,13,2023-01-01 07:50:00,KLO,13.4,12.3,11.7,13.4,64.0,6.7,9.8,973.7,1024.5,1024.2,,,12.7,7.2,7.3,228.0,,26.3,12.2,45.7,43.9,0.0,0.0,40,25.0,300,,,8,LSZH 010750Z 24011KT CAVOK 14/06 Q1024 NOSIG,METAR,,AUTO,LSZH,8.0,240 degrees,11 knots,,,,10000 meters,,,,14.0 C,6.0 C,1024.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,26,True,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47311,2025-09-12 21:20:00+00:00,2025-09-12 21:50:00,15,9,24,2025-09-12 21:20:00,KLO,11.3,7.3,7.7,11.3,94.8,10.5,12.7,971.6,1022.3,1022.4,,,1.9,0.9,1.1,127.0,,4.0,1.8,6.8,6.5,0.0,0.0,1,0.0,298,,,0,LSZH 122120Z VRB01KT CAVOK 12/11 Q1022 NOSIG,METAR,,AUTO,LSZH,21.0,,1 knots,,,,10000 meters,,,,12.0 C,11.0 C,1022.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,False,0.0
47312,2025-09-12 21:50:00+00:00,2025-09-12 22:20:00,12,12,24,2025-09-12 21:50:00,KLO,10.5,6.2,7.2,10.5,96.9,10.0,12.3,971.5,1022.2,1022.4,,,1.0,0.5,0.5,146.0,,1.8,0.9,3.6,3.2,0.0,0.0,2,1.0,297,,,0,LSZH 122150Z AUTO VRB01KT CAVOK 10/10 Q1022 NOSIG,METAR,,AUTO,LSZH,22.0,,1 knots,,,,10000 meters,,,,10.0 C,10.0 C,1022.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,False,0.0
47313,2025-09-12 22:20:00+00:00,2025-09-12 22:50:00,13,5,18,2025-09-12 22:20:00,KLO,9.6,6.2,7.0,8.7,95.2,8.9,11.4,971.6,1022.3,1022.7,,,2.6,2.0,2.0,304.0,,7.2,2.5,9.4,9.0,0.0,0.0,3,2.0,294,,,0,LSZH 122220Z AUTO 32003KT 9999 FEW065 09/09 Q1...,METAR,,AUTO,LSZH,22.0,320 degrees,3 knots,,,,greater than 10000 meters,,,,9.0 C,9.0 C,1022.0 mb,[],[],[],"[('FEW', <metar.Datatypes.distance object at 0...",[],,,,,,,,,,,,,,,,,,LSZH,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,False,0.0
47314,2025-09-12 22:50:00+00:00,2025-09-12 23:20:00,4,6,10,2025-09-12 22:50:00,KLO,9.3,6.0,7.0,8.4,97.2,8.9,11.4,971.7,1022.4,1022.8,,,2.5,1.9,1.9,296.0,,6.8,2.4,9.0,8.6,0.0,0.0,1,0.0,297,,,0,LSZH 122250Z AUTO 31004KT 9999 FEW050 09/09 Q1...,METAR,,AUTO,LSZH,23.0,310 degrees,4 knots,,,,greater than 10000 meters,,,,9.0 C,9.0 C,1022.0 mb,[],[],[],"[('FEW', <metar.Datatypes.distance object at 0...",[],,,,,,,,,,,,,,,,,,LSZH,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,False,0.0
