## Data Download Script

Data originates from the Earth System Grid Federation at https://esgf-node.ipsl.upmc.fr/projects/esgf-ipsl/.

Each file has been concatenated to contain ssp119 and ssp126 scenarios and r1-5 ensemble members from 2015 to 2100. Each has also been regridded to 2.5Â° resolution.

Files are stored on Google Drive. Running this script will download five (5) files from the Drive to a "data" folder within your clone of the repository that will NOT be added to GitHub. 

## Import Statements

In [1]:
import os
import requests

## Configuration/Setup

In [3]:
# File ids and names
files = [
    ("1sTeueTEPI5GZwi3JyRTxkBCX7TxMGs2d", "CNRM_ESM2-1_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc"),
    ("1ogktmFYWR6lOCtNdkJpzzNQd94HbfCUb", "MIROC6_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc"),
    ("1Yjnth1pRKDpkBbUoVzA2Ge9qeuWgq0Os","MPI-ESM1-2-LR_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc"),
    ("14rCBclmR4kO73mb-WMPHLT6UnjQr-eoZ","MRI-ESM2-0_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc"),
    ("13iPhZ0FRSjRJj8l6_JlAx8x8jPyFPtfL","UKESM1-0-LL_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc")
]
# Chunk the data for ease of download
chunk_size = 32768

In [4]:
# Grabbing the root repository filepath
repo_root = os.path.dirname(os.path.abspath("MamalakisResearch"))
# Creating a "data" folder inside the repo root
data_dir = os.path.join(repo_root, "data")
os.makedirs(data_dir, exist_ok=True)

## Downloading Data

In [5]:
def download(file_list):
    """
    Download file(s) from Google Drive
    Large files warning handling enabled
    """

    url = "https://drive.google.com/drive/folders/1eolxmBPSSBzaC0814rBQwSEb5bYXiZsi?usp=share_link"

    for file_id, filename in file_list:

        dest_path = os.path.join(data_dir, filename)

        # Checking if file already exists in the data folder
        if os.path.exists(dest_path):
            print(f"{dest_path} already exists. Skipping download.")
            return

        # Using requests
        session = requests.Session()
        response = session.get(url, params={"id": file_id}, stream=True)
        token = None

        # Checking for Google Drive large file warning
        for key, value in response.cookies.items():
            if key.startswith("download_warning"):
                token = value

        if token:
            response = session.get(url, params={"id": file_id, "confirm": token}, stream=True)

        # Saving file in chunks
        with open(dest_path, "wb") as f:
            for chunk in response.iter_content(chunk_size):
                if chunk:
                    f.write(chunk)

        print(f"{dest_path} downloaded.")

In [6]:
if __name__ == "__main__":

    download(files)

/Users/Caroline/Desktop/school/MamalakisResearch/data/CNRM_ESM2-1_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc downloaded.
/Users/Caroline/Desktop/school/MamalakisResearch/data/MIROC6_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc downloaded.
/Users/Caroline/Desktop/school/MamalakisResearch/data/MPI-ESM1-2-LR_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc downloaded.
/Users/Caroline/Desktop/school/MamalakisResearch/data/MRI-ESM2-0_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc downloaded.
/Users/Caroline/Desktop/school/MamalakisResearch/data/UKESM1-0-LL_ssp119_ssp126_201501_210012_r1-5_2pt5degree.nc downloaded.
