In [17]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# Liste der URLs für die drei Jahre
urls = [
    "https://raw.githubusercontent.com/RFankhauser/MSc_WI_BINA/refs/heads/main/Semesterarbeit/Wetter_Z%C3%BCrich_st%C3%BCndlich/ugz_ogd_meteo_h1_2022.csv",
    "https://raw.githubusercontent.com/RFankhauser/MSc_WI_BINA/refs/heads/main/Semesterarbeit/Wetter_Z%C3%BCrich_st%C3%BCndlich/ugz_ogd_meteo_h1_2023.csv",
    "https://raw.githubusercontent.com/RFankhauser/MSc_WI_BINA/refs/heads/main/Semesterarbeit/Wetter_Z%C3%BCrich_st%C3%BCndlich/ugz_ogd_meteo_h1_2024.csv"
]

# Alle Dateien einlesen und in einer Liste speichern
dataframes = []
for url in urls:
    df = pd.read_csv(url, sep=",", encoding="utf-8")
    dataframes.append(df)

# Alle DataFrames vertikal zusammenfügen
df_all_wetter = pd.concat(dataframes, ignore_index=True)

# Datum in datetime konvertieren (korrekte Variable verwenden)
df_all_wetter["Datum"] = pd.to_datetime(df_all_wetter["Datum"], errors="coerce")

display(df_all_wetter.head(10))


Unnamed: 0,Datum,Standort,Parameter,Intervall,Einheit,Wert,Status
0,2022-01-01 00:00:00+01:00,Zch_Stampfenbachstrasse,T,h1,gradcelsius,7.22,bereinigt
1,2022-01-01 00:00:00+01:00,Zch_Stampfenbachstrasse,Hr,h1,relative_Luftfeuchtigkeit,82.93,bereinigt
2,2022-01-01 00:00:00+01:00,Zch_Stampfenbachstrasse,p,h1,hPa,977.2,bereinigt
3,2022-01-01 00:00:00+01:00,Zch_Stampfenbachstrasse,RainDur,h1,min,0.0,bereinigt
4,2022-01-01 00:00:00+01:00,Zch_Stampfenbachstrasse,StrGlo,h1,W/m2,0.02,bereinigt
5,2022-01-01 00:00:00+01:00,Zch_Stampfenbachstrasse,WD,h1,°,184.59,bereinigt
6,2022-01-01 00:00:00+01:00,Zch_Stampfenbachstrasse,WVv,h1,m/s,1.04,bereinigt
7,2022-01-01 00:00:00+01:00,Zch_Stampfenbachstrasse,WVs,h1,m/s,1.16,bereinigt
8,2022-01-01 00:00:00+01:00,Zch_Schimmelstrasse,T,h1,gradcelsius,5.96,bereinigt
9,2022-01-01 00:00:00+01:00,Zch_Schimmelstrasse,Hr,h1,relative_Luftfeuchtigkeit,,bereinigt


In [16]:

# NA-Werte in "Wert" mit NaN setzen, um korrekt zu mitteln
df_all_wetter["Wert"] = pd.to_numeric(df_all_wetter["Wert"], errors="coerce")

# Durchschnitt pro Stunde und Parameter berechnen
df_avg = df_all_wetter.groupby(["Datum", "Parameter"])["Wert"].mean().reset_index()

# Umstrukturieren der Tabelle: Spalten für die einzelnen Parameter
df_pivot_wetter = df_avg.pivot(index="Datum", columns="Parameter", values="Wert").reset_index()

# DataFrame anzeigen
display(df_pivot_wetter.head(10))

Parameter,Datum,Hr,RainDur,StrGlo,T,WD,WVs,WVv,p
0,2022-01-01 00:00:00+01:00,84.575,0.0,0.02,6.59,182.53,0.603333,0.39,978.233333
1,2022-01-01 01:00:00+01:00,85.035,0.0,0.02,6.313333,167.976667,0.503333,0.376667,978.106667
2,2022-01-01 02:00:00+01:00,86.815,0.0,0.03,6.023333,243.186667,0.553333,0.286667,978.163333
3,2022-01-01 03:00:00+01:00,87.505,0.0,0.02,5.553333,171.72,0.443333,0.21,978.45
4,2022-01-01 04:00:00+01:00,88.5,0.0,0.02,5.083333,214.436667,0.356667,0.23,978.446667
5,2022-01-01 05:00:00+01:00,88.9,0.0,0.02,4.833333,63.666667,0.346667,0.25,978.573333
6,2022-01-01 06:00:00+01:00,88.98,0.0,0.02,4.466667,143.23,0.576667,0.625,978.443333
7,2022-01-01 07:00:00+01:00,89.725,0.0,0.02,4.466667,73.11,0.416667,0.115,978.36
8,2022-01-01 08:00:00+01:00,89.19,0.0,20.21,3.843333,151.373333,0.716667,0.62,978.826667
9,2022-01-01 09:00:00+01:00,89.92,0.0,119.28,3.616667,236.896667,0.563333,0.503333,978.686667
