### Herunterladen der Wetterdaten für die zehn definierten Orte

- Downloadfunktion und Ortsliste definieren
- Daten für jeden Ort herunterladen und entpacken
- Den Pfad für die einzelnen Wetterdateien speichern in datei_liste.txt

In [4]:
import requests
import zipfile
import io
import os

def search_for_link(url: str, search_text: str) -> str:
    response = requests.get(url)
    if response.status_code == 200:
        content = response.text
        found_index = content.find(search_text)
        if found_index != -1:
            start_index = content.rfind('"', 0, found_index)
            start_index += 1
            end_index = content.find('"', found_index)
            link = content[start_index:end_index]
            return link
    return "search_text not found"

cdc_historical_index_url = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/historical/"
cdc_recent_index_url = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/"

orte = [
    ["00856", "Chieming"],
    ["00891", "Cuxhaven"],
    ["01346", "Feldberg/Schwarzwald"],
    ["02290", "Hohenpeißenberg"],
    ["02712", "Konstanz"],
    ["03631", "Norderney"],
    ["03730", "Oberstdorf"],
    ["03987", "Potsdam"],
    ["04271", "Rostock-Warnemünde"],
    ["04625", "Schwerin"]]

In [5]:
# Finden und Downloaden der Daten
ortsdaten_paths = []

for ort in orte:
    # Link finden
    file_name = search_for_link(cdc_historical_index_url, ort[0])
    link = cdc_historical_index_url + file_name

    # Downloaden
    response = requests.get(link)
    zip_file = zipfile.ZipFile(io.BytesIO(response.content))

    # Entpacken
    foldername = link.split("/")[-1].split(".")[0]
    path = "datasets/orte/" + foldername
    zip_file.extractall(path)
    zip_file.close()

    ortsdaten_paths.append(path)

In [6]:
# Datei Liste mit den Pfaden für die Klimadatei jedes Ortes erstellen
with open("datasets/orte/datei_liste.txt", "w") as file:
    file.truncate(0)

# produkt_klima_tag Datei in jedem Pfad finden
found_paths = []
for path in ortsdaten_paths:
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.startswith("produkt") and len(file) > len("produkt"):
                found_paths.append(os.path.join(root, file))

# Pfad in datei_liste.txt schreiben
for path in found_paths:
    print(path)
    with open("datasets/orte/datei_liste.txt", "a") as file:
        file.write(path + "\n")

datasets/orte/tageswerte_KL_00856_19980101_20231231_hist/produkt_klima_tag_19980101_20231231_00856.txt
datasets/orte/tageswerte_KL_00891_19460101_20231231_hist/produkt_klima_tag_19460101_20231231_00891.txt
datasets/orte/tageswerte_KL_01346_19450101_20231231_hist/produkt_klima_tag_19450101_20231231_01346.txt
datasets/orte/tageswerte_KL_02290_17810101_20231231_hist/produkt_klima_tag_17810101_20231231_02290.txt
datasets/orte/tageswerte_KL_02712_19590701_20231231_hist/produkt_klima_tag_19590701_20231231_02712.txt
datasets/orte/tageswerte_KL_03631_18580301_20231231_hist/produkt_klima_tag_18580301_20231231_03631.txt
datasets/orte/tageswerte_KL_03730_19100101_20231231_hist/produkt_klima_tag_19100101_20231231_03730.txt
datasets/orte/tageswerte_KL_03987_18930101_20231231_hist/produkt_klima_tag_18930101_20231231_03987.txt
datasets/orte/tageswerte_KL_04271_19470101_20231231_hist/produkt_klima_tag_19470101_20231231_04271.txt
datasets/orte/tageswerte_KL_04625_18900101_20231231_hist/produkt_klima_ta