In [None]:
import logging
import os
from pathlib import Path

import requests
from dotenv import load_dotenv

### Download data functions

In [None]:
logging.basicConfig(level=logging.INFO)

# overriding requests.Session.rebuild_auth to maintain headers when redirected
class SessionWithHeaderRedirection(requests.Session):
    AUTH_HOST = "urs.earthdata.nasa.gov"

    def __init__(self, username: str, password: str):
        super().__init__()

        self.auth = (username, password)

    def rebuild_auth(self, prepared_request, response):
        headers = prepared_request.headers

        url = prepared_request.url

        if "Authorization" in headers:
            original_parsed = requests.utils.urlparse(response.request.url)

            redirect_parsed = requests.utils.urlparse(url)

            if (
                (original_parsed.hostname != redirect_parsed.hostname)
                and redirect_parsed.hostname != self.AUTH_HOST
                and original_parsed.hostname != self.AUTH_HOST
            ):
                del headers["Authorization"]

        return


def download_data(urls: list[str], output_directory: str | Path, username: str, password: str):
    session = SessionWithHeaderRedirection(username, password)

    if not os.path.isdir(output_directory):
        os.makedirs(output_directory)

    for url in urls:
        filename = os.path.join(output_directory, url[url.rfind("/") + 1 :])

        try:
            response = session.get(url, stream=True)
            logging.info(f"Downloading {url}, status code: {response.status_code}")
            response.raise_for_status()

            with open(filename, "wb") as fd:
                for chunk in response.iter_content(chunk_size=1024 * 1024):
                    fd.write(chunk)

            logging.info(f"Downloaded {url} to {filename}")

        except requests.exceptions.HTTPError as e:
            logging.error(f"Error downloading {url}: {e}")


### Download data config

Change this to the corret paths for the data you need

In [None]:
DATA_DIR = Path().cwd().parents[1] / "datasets" / "modis" / "sea_surface_temp"
URLS_FILE = Path().cwd() /"urls" / "sea_surface_temp.txt"

In [None]:
# Create the directory if it doesn't exist
DATA_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
def load_env_vars():
    try:
        load_dotenv()
    except FileNotFoundError:
        print("Error: .env file not found.")

def getenv(key: str) -> str:
    value = os.getenv(key)
    if value is None:
        raise ValueError(f"Environment variable '{key}' is not set.")
    return value

In [None]:
load_env_vars()
username = getenv("USERNAME")
password = getenv("PASSWORD")

In [None]:
# Open the file in read mode
with open(URLS_FILE, "r") as f:
    # Read all lines into a list
    urls = f.readlines()

urls = [url.strip() for url in urls]

In [None]:
download_data(urls=urls, output_directory=DATA_DIR, username=username, password=password)