# Download data from sources

**Imports**

In [1]:
import os
import requests
import zipfile
from tqdm.notebook import tqdm
from dotenv import load_dotenv

In [2]:
load_dotenv()

DATA_INPUT = os.getenv("DATA_INPUT")

DATA_INPUT_KRP = os.getenv("DATA_INPUT_KRP")
DATA_INPUT_RRB = os.getenv("DATA_INPUT_RRB")
DATA_INPUT_OS = os.getenv("DATA_INPUT_OS")
DATA_INPUT_ABl = os.getenv("DATA_INPUT_ABl")

BASE_FOLDERS = [
    DATA_INPUT,
    DATA_INPUT_KRP,
    DATA_INPUT_RRB,
    DATA_INPUT_OS,
    DATA_INPUT_ABl,
]

KRP_SOURCE_1803 = os.getenv("KRP_SOURCE_1803")
KRP_SOURCE_1899 = os.getenv("KRP_SOURCE_1899")

RRB_SOURCE_1803 = os.getenv("RRB_SOURCE_1803")
RRB_SOURCE_1887 = os.getenv("RRB_SOURCE_1887")
RRB_SOURCE_1903 = os.getenv("RRB_SOURCE_1903")

OS_SOURCE_1803 = os.getenv("OS_SOURCE_1803")

ABl_SOURCE_1980 = os.getenv("ABl_SOURCE_1980")

### Setup folders

In [3]:
for folder in BASE_FOLDERS:
    if not os.path.exists(folder):
        os.makedirs(folder)
        with open(f"{folder}/_placeholder.md", "w") as f:
            f.write(
                "Placeholder file that makes sure the folder structure is uploaded to GitHub.\n"
            )

### Download and unzip data

In [4]:
source_data_dict = {
    KRP_SOURCE_1803: DATA_INPUT_KRP,
    KRP_SOURCE_1899: DATA_INPUT_KRP,
    RRB_SOURCE_1803: DATA_INPUT_RRB,
    RRB_SOURCE_1887: DATA_INPUT_RRB,
    RRB_SOURCE_1903: DATA_INPUT_RRB,
    OS_SOURCE_1803: DATA_INPUT_OS,
    ABl_SOURCE_1980: DATA_INPUT_ABl,
}

In [None]:
for source_url, data_folder in tqdm(source_data_dict.items()):
    print(f"Downloading: {source_url}\nto: {data_folder}")
    print()
    raw = requests.get(source_url)
    if raw.status_code == 200:
        with open(f"{data_folder}/tmp.zip", "wb") as f:
            f.write(raw.content)
    else:
        print("Error downloading file")

    with zipfile.ZipFile(f"{data_folder}/tmp.zip", "r") as zip_ref:
        zip_ref.extractall(f"{data_folder}")

    os.remove(f"{data_folder}/tmp.zip")