# Swissalti3d data acquisition

**Author:** Florian Klaver

Script to download all relevant .tif files from the swissalti3d dataset for the Canton of GraubÃ¼nden.

In [2]:
import os
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

In [3]:
# --- CONFIGURATION ---
# Path to the CSV file containing the list of URLs (downloaded from Swisstopo shop)
csv_file = '../data/ch.swisstopo.swissalti3d-cxnSHHnz.csv'
# Target directory where the .tif files will be saved
download_dir = '../data/swissalti3d'
# Number of parallel downloads 
max_parallel_downloads = 8

# Ensure the download directory exists 
if not os.path.exists(download_dir):
    os.makedirs(download_dir)

In [None]:
# --- 1. LOAD CSV FILE ---
# Reading the CSV file.
df = pd.read_csv(csv_file, header=None)

# The URLs are located in the first column (index 0).
# We drop empty rows (NaN) to ensure a clean list of URLs.
urls = df[0].dropna().tolist()

print(f"Found {len(urls)} files to download.")

# --- 2. DOWNLOAD FUNCTION ---
def download_url(url):
    """
    Downloads a single file from the given URL.
    Includes logic to skip files that have already been downloaded (Resume functionality).
    """
    try:
        # Extract the filename from the URL (e.g., swissalti3d_..._5728.tif)
        filename = url.split('/')[-1]
        filepath = os.path.join(download_dir, filename)

        # Check if the file already exists and is not empty (Resume functionality)
        if os.path.exists(filepath) and os.path.getsize(filepath) > 0:
            return # Skip download, file already exists

        # Send HTTP GET request
        # stream=True is used to handle large files efficiently without loading the entire file into RAM
        r = requests.get(url.strip(), stream=True)
        
        if r.status_code == 200:
            # Write file to disk in chunks
            with open(filepath, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    f.write(chunk)
        else:
            # Handle HTTP errors (e.g., 404 Not Found)
            print(f"Error: Status Code {r.status_code} for {url}")
            
    except Exception as e:
        print(f"Error downloading {url}: {e}")

# --- 3. EXECUTE PARALLEL DOWNLOAD ---
print("Starting download... ")

# ThreadPoolExecutor is used to run multiple download functions in parallel
with ThreadPoolExecutor(max_workers=max_parallel_downloads) as executor:
    # executor.map applies the 'download_url' function to every item in 'urls'
    list(tqdm(executor.map(download_url, urls), total=len(urls)))

print("Download completed!")