# Download depencencies

In [1]:
%pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


# Automatic Zip Dowloader

In [None]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import time
import re
from datetime import datetime

def setup_driver():
    download_dir = os.path.join(os.getcwd(), "Zip")
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    chrome_options = Options()
    prefs = {"download.default_directory": download_dir}
    chrome_options.add_experimental_option("prefs", prefs)

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    return driver, download_dir

def wait_for_download_completion(directory, initial_files, timeout=1000):
    end_time = time.time() + timeout
    while time.time() < end_time:
        current_files = set(os.listdir(directory))
        new_files = current_files - initial_files
        if new_files and all(not file.endswith('.crdownload') and not file.endswith('.tmp') for file in current_files):
            return True
        time.sleep(1)
    return False

def login(driver):
    driver.get("https://kbopub.economie.fgov.be/kbo-open-data/login?lang=nl")
    driver.find_element(By.ID, "j_username").send_keys("#") #fill in your username
    driver.find_element(By.ID, "j_password").send_keys("#") #fill in your password
    driver.find_element(By.ID, "proceed").click()

def navigate_and_download(driver, download_dir):
    WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable((By.LINK_TEXT, "Download een KBO Open Data Bestand"))
    ).click()

    links = WebDriverWait(driver, 20).until(
        EC.presence_of_all_elements_located((By.XPATH, '//a[contains(@href, "_Full.zip")]'))
    )

    file_dates = {}
    for link in links:
        href = link.get_attribute('href')
        match = re.search(r'(\d{4}_\d{2})_Full\.zip', href)
        if match:
            date_str = match.group(1)
            date = datetime.strptime(date_str, '%Y_%m')
            file_dates[date] = href

    if file_dates:
        latest_date = max(file_dates.keys())
        latest_file_url = file_dates[latest_date]
        driver.get(latest_file_url)  # This initiates the download

if __name__ == "__main__":
    driver, download_dir = setup_driver()
    try:
        initial_files = set(os.listdir(download_dir))
        login(driver)
        navigate_and_download(driver, download_dir)
        
        # Correctly calling wait_for_download_completion after navigate_and_download
        if not wait_for_download_completion(download_dir, initial_files):
            print("Download did not complete within the timeout period.")
    finally:
        driver.quit()
