# Alle notwendigen Inputdaten einlesen

## Imports & Generische Konfig

In [4]:
#Variables
import config

#Preprocessing
import preprocessing_functions as pf

#Libraries
import requests
import pandas as pd
import glob
from bs4 import BeautifulSoup

#Verbindung zur DB aufbauen mit preprocessing function
pf.db_connect(config.db_name)


## Historische Feinstaub Daten (ZH) auslesen

In [5]:
url = 'https://data.stadt-zuerich.ch/dataset/ugz_luftschadstoffmessung_tageswerte'

# Verbindung zur Webseite herstellen
response = requests.get(url)

# Webseite parsen
soup = BeautifulSoup(response.text, 'html.parser')

# Alle CSV-Links finden
csv_links = []
for a in soup.find_all('a', href=True):
    link = a['href']
    if 'ugz_ogd_air_d1_2' in link and link.endswith('.csv'):
        csv_links.append(link)

# CSV-Dateien herunterladen
for link in csv_links:
    response = requests.get(link)
    with open('csv-files/'+link.split('/')[-1], 'wb') as f:
        f.write(response.content)

# Liste aller ugz CSV-Dateien im Verzeichnis csv-files
csv_files = glob.glob('csv-files/ugz*.csv')

# Leeres DataFrame zum Speichern der gefilterten Daten erstellen
AQI_history_ZH = pd.DataFrame()
    
# Durch alle CSV-Dateien iterieren und Daten filtern
for file in csv_files:
    df = pd.read_csv(file)
    df_filtered = df[df['Parameter'] == 'PM10']
    df_filtered = df_filtered[df_filtered['Standort'] == 'Zch_Stampfenbachstrasse']
    df_filtered['Datum'] = df_filtered['Datum'].apply(pf.format_timestamp)
    AQI_history_ZH = pd.concat([AQI_history_ZH, df_filtered])

# Index neu setzen
AQI_history_ZH.reset_index(drop=True, inplace=True)

# Dataframe in die DB schreiben
AQI_history_ZH.to_sql(config.db_AQI_history, config.db_login, if_exists='append', index=False)


171

## Historische Wetterdaten (ZH) aus csv einlesen

In [6]:
# CSV-File ins Dataframe einlesen
weather_history_ZH = pd.read_csv(config.csv_weatherhistory)

weather_history_ZH['datetime'] = weather_history_ZH['datetime'].apply(pf.format_timestamp)


# Index neu setzen
weather_history_ZH.reset_index(drop=True, inplace=True)




## Anreicherung der Daten

In [7]:
# Simple-Moving-Average (7, 14, 30 Tage Temperatur)
window_sizes_temp = [7, 14, 30]

for window_size_temp in window_sizes_temp:
    column_name = f"SMA_temp{window_size_temp}"
    weather_history_ZH[column_name] = weather_history_ZH['temp'].rolling(window=window_size_temp).mean()

# Simple-Moving-Average (7, 14, 30 Tage Luftfeuchtigkeit)
window_sizes_hum = [7, 14, 30]

for window_size_hum in window_sizes_hum:
    column_name = f"SMA_humidity{window_size_hum}"
    weather_history_ZH[column_name] = weather_history_ZH['humidity'].rolling(window=window_size_hum).mean()

# Temperatur Range
weather_history_ZH['Temp_Range'] = weather_history_ZH['tempmax'] - weather_history_ZH['tempmin']

# Jahreszeit 
weather_history_ZH['Season'] = (weather_history_ZH['datetime'].apply(lambda x: int(x.split('.')[1]))%12 + 3)//3
seasons = {1: 'Winter', 2: 'Frühling', 3: 'Sommer', 4: 'Herbst'}
weather_history_ZH['Season'] = weather_history_ZH['Season'].map(seasons)

# Wetterkonditionen
def weather_type(condition):
    if 'cloudy' in condition.lower():
        return 'Cloudy'
    elif 'rain' in condition.lower():
        return 'Rainy'
    elif 'snow' in condition.lower():
        return 'Snowy'
    else:
        return 'Clear'

weather_history_ZH['Weather_Type'] = weather_history_ZH['conditions'].apply(weather_type)

# Dataframe in die DB schreiben
weather_history_ZH.to_sql(config.db_weather_history, config.db_login, if_exists='replace', index=False)

376