In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import re
pd.set_option('display.max_columns', None)  # Alle Spalten anzeigen

## Merge Daten und und METAR

In [2]:
from pathlib import Path
import pandas as pd

# ==========================================
# 1) Einstellungen & Pfade
# ==========================================
P_METAR   = Path("metar_official_HALBHOUR_MATCHED.csv")
P_FLIGHTS = Path("flights_with_meteosuisse_HALBHOUR_merged.csv")
P_OUTPUT  = Path("flights_with_weather_HALBHOUR_merged.csv")

GRID_START = pd.Timestamp("2023-01-01 05:50", tz="UTC")
STEP_MIN   = 30

# ==========================================
# 2) Hilfsfunktionen
# ==========================================
def to_utc_naive(s: pd.Series) -> pd.Series:
    x = pd.to_datetime(s, errors="coerce", utc=True)
    return x.dt.tz_convert("UTC").dt.tz_localize(None)

def snap_to_grid(ts: pd.Series, grid_start: pd.Timestamp, step_min: int = 30) -> pd.Series:
    if isinstance(grid_start, pd.Timestamp) and grid_start.tz is not None:
        ts_aware = pd.to_datetime(ts, errors="coerce", utc=True)
        ts_naive = ts_aware.dt.tz_convert("UTC").dt.tz_localize(None)
        gs_naive = grid_start.tz_convert("UTC").tz_localize(None)
    else:
        ts_naive = pd.to_datetime(ts, errors="coerce")
        gs_naive = pd.Timestamp(grid_start)
    delta = ts_naive - gs_naive
    step  = pd.Timedelta(minutes=step_min)
    floored = (delta // step) * step
    return gs_naive + floored

# ==========================================
# 3) Daten laden
# ==========================================
metar = pd.read_csv(P_METAR, encoding="utf-8")
flights = pd.read_csv(P_FLIGHTS, encoding="utf-8")

# ==========================================
# 4) Zeitspalten harmonisieren
# ==========================================
if "time" not in metar.columns:
    raise ValueError("Im METAR-File wird eine Spalte 'time' erwartet.")
metar["time"] = to_utc_naive(metar["time"])
metar = metar.rename(columns={"time": "grid_time"}).copy()

if "grid_time" in flights.columns:
    flights["grid_time"] = to_utc_naive(flights["grid_time"])
else:
    if "interval_start" not in flights.columns:
        raise ValueError("Im Flights-File wird entweder 'grid_time' oder 'interval_start' benÃ¶tigt.")
    flights["interval_start"] = to_utc_naive(flights["interval_start"])
    flights["grid_time"] = snap_to_grid(flights["interval_start"], GRID_START, STEP_MIN)

# doppelte Zeiten entfernen
flights = flights.sort_values("grid_time").drop_duplicates(subset=["grid_time"], keep="last")
metar   = metar.sort_values("grid_time").drop_duplicates(subset=["grid_time"], keep="last")

# ==========================================
# 5) Merge: Flights als Referenz (alle Zeilen behalten)
#    METAR-Daten werden rechts angefÃ¼gt
# ==========================================
merged = flights.merge(
    metar,
    on="grid_time",
    how="left",
    suffixes=("", "_metar")
)

# ==========================================
# 6) Optional: METAR-Spalten ans Ende verschieben
# ==========================================
# (sorgt dafÃ¼r, dass grid_time + alle Flights vorne bleiben)
metar_cols = [c for c in merged.columns if c.endswith("_metar") or c in metar.columns and c != "grid_time"]
flight_cols = [c for c in merged.columns if c not in metar_cols]
merged = merged[flight_cols + metar_cols]

# ==========================================
# 7) Diagnose
# ==========================================
print("\nâœ… Merge abgeschlossen")
print(f"Flights-Zeilen: {len(flights):,}")
print(f"METAR-Zeilen:   {len(metar):,}")
print(f"Output-Zeilen:  {len(merged):,}")
missing_rows = merged.isna().all(axis=1).sum()
print(f"VollstÃ¤ndig leere Zeilen: {missing_rows:,}")

# ==========================================
# 8) Speichern
# ==========================================
merged.to_csv(P_OUTPUT, index=False, encoding="utf-8")
print(f"\nðŸ’¾ Datei gespeichert als: {P_OUTPUT}")
print("\nVorschau:")
print(merged.head(8))



âœ… Merge abgeschlossen
Flights-Zeilen: 47,316
METAR-Zeilen:   47,316
Output-Zeilen:  47,316
VollstÃ¤ndig leere Zeilen: 0

ðŸ’¾ Datei gespeichert als: flights_with_weather_HALBHOUR_merged.csv

Vorschau:
        interval_start         interval_end  n_arrivals  n_departures  \
0  2023-01-01 05:50:00  2023-01-01 06:20:00           5             0   
1  2023-01-01 06:20:00  2023-01-01 06:50:00           1             3   
2  2023-01-01 06:50:00  2023-01-01 07:20:00           4             9   
3  2023-01-01 07:20:00  2023-01-01 07:50:00           1            18   
4  2023-01-01 07:50:00  2023-01-01 08:20:00           5             8   
5  2023-01-01 08:20:00  2023-01-01 08:50:00           8             1   
6  2023-01-01 08:50:00  2023-01-01 09:20:00           1             1   
7  2023-01-01 09:20:00  2023-01-01 09:50:00           4             3   

   n_total           grid_time station  \
0        5 2023-01-01 05:50:00     KLO   
1        4 2023-01-01 06:20:00     KLO   
2       13 2

In [3]:
df = pd.read_csv("flights_with_weather_HALBHOUR_merged.csv")
df

Unnamed: 0,interval_start,interval_end,n_arrivals,n_departures,n_total,grid_time,station,air_temperature_2_m_above_ground_current_value_[degc],air_temperature_at_5_cm_above_grass_current_value_[degc],air_temperature_at_surface_current_value_[degc],chill_temperature_current_value_[degc],relative_air_humidity_2_m_above_ground_current_value_[percent],dew_point_2_m_above_ground_current_value_[degc],vapour_pressure_2_m_above_ground_current_value_[hpa],atmospheric_pressure_at_barometric_altitude_(qfe)_current_value_[hpa],pressure_reduced_to_sea_level_according_to_standard_atmosphere_(qnh)_current_value_[hpa],pressure_reduced_to_sea_level_(qff)_current_value_[hpa],geopotential_height_of_the_850_hpa_level_current_value_[gpm],geopotential_height_of_the_700_hpa_level_current_value_[gpm],gust_peak_(one_second)_maximum_in_m_per_s_[m_per_s],wind_speed_vectoriel_ten_minutes_mean_in_m_per_s_[m_per_s],wind_speed_scalar_ten_minutes_mean_in_m_per_s_[m_per_s],wind_direction_ten_minutes_mean_[deg],foehn_index_[code],wind_speed_ten_minutes_mean_in_km_per_h_[km_per_h],gust_peak_(three_seconds)_maximum_in_m_per_s_[m_per_s],gust_peak_(one_second)_maximum_in_km_per_h_[km_per_h],gust_peak_(three_seconds)_maximum_in_km_per_h_[km_per_h],precipitation_ten_minutes_total_[mm],snow_depth_(automatic_measurement)_current_value_[cm],global_radiation_ten_minutes_mean_[w_per_mÂ²],diffuse_radiation_ten_minutes_mean_[w_per_mÂ²],longwave_incoming_radiation_ten_minutes_mean_[w_per_mÂ²],longwave_outgoing_radiation_ten_minute_mean_[w_per_mÂ²],shortwave_reflected_radiation_ten_minute_mean_[w_per_mÂ²],sunshine_duration_ten_minutes_total_[min],code,type,correction,mod,station_id,cycle,wind_dir,wind_speed,wind_gust,wind_dir_from,wind_dir_to,vis,vis_dir,max_vis,max_vis_dir,temp,dewpt,press,runway,weather,recent,sky,windshear,wind_speed_peak,wind_dir_peak,peak_wind_time,wind_shift_time,max_temp_6hr,min_temp_6hr,max_temp_24hr,min_temp_24hr,press_sea_level,precip_1hr,precip_3hr,precip_6hr,precip_24hr,snowdepth,ice_accretion_1hr,ice_accretion_3hr,ice_accretion_6hr,airport
0,2023-01-01 05:50:00,2023-01-01 06:20:00,5,0,5,2023-01-01 05:50:00,KLO,13.0,9.9,9.1,13.0,66.3,6.9,9.9,973.0,1023.8,1023.5,,,7.3,4.0,4.1,224.0,,14.8,6.9,26.3,24.8,0.0,0.0,2,1.0,302,,,0,LSZH 010550Z 22007KT CAVOK 13/07 Q1023 NOSIG,METAR,,AUTO,LSZH,6.0,220 degrees,7 knots,,,,10000 meters,,,,13.0 C,7.0 C,1023.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH
1,2023-01-01 06:20:00,2023-01-01 06:50:00,1,3,4,2023-01-01 06:20:00,KLO,12.5,9.7,9.0,12.5,68.4,6.8,9.9,973.2,1024.0,1023.8,,,6.1,4.1,4.1,233.0,,14.8,5.7,22.0,20.5,0.0,0.0,2,1.0,300,,,0,LSZH 010620Z 24007KT CAVOK 13/07 Q1024 NOSIG,METAR,,AUTO,LSZH,6.0,240 degrees,7 knots,,,,10000 meters,,,,13.0 C,7.0 C,1024.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH
2,2023-01-01 06:50:00,2023-01-01 07:20:00,4,9,13,2023-01-01 06:50:00,KLO,13.1,10.8,10.1,13.1,65.4,6.8,9.9,973.2,1024.0,1023.7,,,9.3,6.2,6.3,231.0,,22.7,9.1,33.5,32.8,0.0,0.0,1,0.0,300,,,0,LSZH 010650Z 23010KT CAVOK 13/07 Q1024 NOSIG,METAR,,AUTO,LSZH,7.0,230 degrees,10 knots,,,,10000 meters,,,,13.0 C,7.0 C,1024.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH
3,2023-01-01 07:20:00,2023-01-01 07:50:00,1,18,19,2023-01-01 07:20:00,KLO,12.8,10.7,10.0,12.8,66.3,6.7,9.8,973.4,1024.2,1024.0,,,9.0,5.3,5.4,235.0,,19.4,8.8,32.4,31.7,0.0,0.0,10,8.0,299,,,0,LSZH 010720Z 24007KT CAVOK 13/06 Q1024 NOSIG,METAR,,AUTO,LSZH,7.0,240 degrees,7 knots,,,,10000 meters,,,,13.0 C,6.0 C,1024.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH
4,2023-01-01 07:50:00,2023-01-01 08:20:00,5,8,13,2023-01-01 07:50:00,KLO,13.4,12.3,11.7,13.4,64.0,6.7,9.8,973.7,1024.5,1024.2,,,12.7,7.2,7.3,228.0,,26.3,12.2,45.7,43.9,0.0,0.0,40,25.0,300,,,8,LSZH 010750Z 24011KT CAVOK 14/06 Q1024 NOSIG,METAR,,AUTO,LSZH,8.0,240 degrees,11 knots,,,,10000 meters,,,,14.0 C,6.0 C,1024.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47311,2025-09-12 21:20:00,2025-09-12 21:50:00,15,9,24,2025-09-12 21:20:00,KLO,11.3,7.3,7.7,11.3,94.8,10.5,12.7,971.6,1022.3,1022.4,,,1.9,0.9,1.1,127.0,,4.0,1.8,6.8,6.5,0.0,0.0,1,0.0,298,,,0,LSZH 122120Z VRB01KT CAVOK 12/11 Q1022 NOSIG,METAR,,AUTO,LSZH,21.0,,1 knots,,,,10000 meters,,,,12.0 C,11.0 C,1022.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH
47312,2025-09-12 21:50:00,2025-09-12 22:20:00,12,12,24,2025-09-12 21:50:00,KLO,10.5,6.2,7.2,10.5,96.9,10.0,12.3,971.5,1022.2,1022.4,,,1.0,0.5,0.5,146.0,,1.8,0.9,3.6,3.2,0.0,0.0,2,1.0,297,,,0,LSZH 122150Z AUTO VRB01KT CAVOK 10/10 Q1022 NOSIG,METAR,,AUTO,LSZH,22.0,,1 knots,,,,10000 meters,,,,10.0 C,10.0 C,1022.0 mb,[],[],[],[],[],,,,,,,,,,,,,,,,,,LSZH
47313,2025-09-12 22:20:00,2025-09-12 22:50:00,13,5,18,2025-09-12 22:20:00,KLO,9.6,6.2,7.0,8.7,95.2,8.9,11.4,971.6,1022.3,1022.7,,,2.6,2.0,2.0,304.0,,7.2,2.5,9.4,9.0,0.0,0.0,3,2.0,294,,,0,LSZH 122220Z AUTO 32003KT 9999 FEW065 09/09 Q1...,METAR,,AUTO,LSZH,22.0,320 degrees,3 knots,,,,greater than 10000 meters,,,,9.0 C,9.0 C,1022.0 mb,[],[],[],"[('FEW', <metar.Datatypes.distance object at 0...",[],,,,,,,,,,,,,,,,,,LSZH
47314,2025-09-12 22:50:00,2025-09-12 23:20:00,4,6,10,2025-09-12 22:50:00,KLO,9.3,6.0,7.0,8.4,97.2,8.9,11.4,971.7,1022.4,1022.8,,,2.5,1.9,1.9,296.0,,6.8,2.4,9.0,8.6,0.0,0.0,1,0.0,297,,,0,LSZH 122250Z AUTO 31004KT 9999 FEW050 09/09 Q1...,METAR,,AUTO,LSZH,23.0,310 degrees,4 knots,,,,greater than 10000 meters,,,,9.0 C,9.0 C,1022.0 mb,[],[],[],"[('FEW', <metar.Datatypes.distance object at 0...",[],,,,,,,,,,,,,,,,,,LSZH
