In [None]:
import os
import requests
import tempfile
import zipfile
from pathlib import Path
import concurrent.futures

In [None]:
PROJECT_ROOT = Path(os.getenv("PROJECT_ROOT"))
DIR_BASE = PROJECT_ROOT / 'data/input/weather/'

In [None]:
years = list(range(2018, 2023))
months = [f"{i:02}" for i in range(1, 13)]

In [None]:
URL_BASE = "https://danepubliczne.imgw.pl/pl/datastore/getfiledown/Arch/Telemetria/Meteo/"
URL_DATA = URL_BASE + "{year}/Meteo_{year}-{month}.zip"

FILES_META = [
    "kody_parametr.csv",
    "kody_stacji.csv",
    "opis.txt"
]

In [None]:
def download_and_extract(url, extract_dir):
    os.makedirs(extract_dir, exist_ok=True)

    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to download the file. Status code: {response.status_code}")
        return

    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(response.content)
        temp_file_path = temp_file.name

    with zipfile.ZipFile(temp_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)

    os.unlink(temp_file_path)
    print(f"Downloaded file {url.split('/')[-1]} and extracted to {extract_dir}")


In [None]:
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    for year in years:
        for month in months:
            url = URL_DATA.format(year=year, month=month)
            dir = DIR_BASE / str(year) / month

            # Fix for 2021
            if year == 2021 and month in ['01', '02']:
                url = url.replace("zip", "ZIP")

            executor.submit(download_and_extract, url, dir)

In [None]:
for filename in FILES_META:
    url = URL_BASE + filename
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Failed to download the file. Status code: {response.status_code}")
        continue
    
    with open(DIR_BASE / filename, 'wb') as f:
        f.write(response.content) 
    
    print(f"Downloaded file {filename}")