In [7]:
import os
import sys
from pathlib import Path
from tqdm import tqdm
from loguru import logger
import xarray as xr
import requests

# Download CHIRTS files



This notebook goes through the process of downloading [CHIRTS daily Tmin and Tmax daily data](https://iridl.ldeo.columbia.edu/SOURCES/.UCSB/.CHIRTS/.v1.0/.daily/.global/.0p05/index.html?Set-Language=fr) available per year.

### Input parameters

In [8]:
DEST_PATH = Path("../../data/01-raw/chirts")

### Setup logging

In [9]:
LOG_PATH = Path("../../logs/")

# Configure logger
logger.remove()
logger.add(sys.stderr, format="{time} {level} {message}", level="INFO")

# Configure daily rotation for file logging
daily_sink_file_fmt = LOG_PATH / "chirts_{time:YYYY-MM-DD}.log"
logger.add(
    daily_sink_file_fmt,
    rotation="00:00",
    format="{time} {level} {message}",
    level="INFO",
)

4

### Create download function

In [10]:
def download_file(url, save_path):
    file_name = str(save_path).split("/")[-1]
    logger.info(
        "==========================================================================================="
    )
    logger.info(f"Downloading: {file_name}")
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get("content-length", 0))
    block_size = 1024 * 1024  # 1 Megabyte
    logger.info(f"Total size: {(total_size/ block_size):.2f} MB")
    progress_bar = tqdm(total=total_size, unit="iB", unit_scale=True)
    with open(save_path, "wb") as f:
        for data in response.iter_content(block_size):
            progress_bar.update(len(data))
            f.write(data)
    progress_bar.close()

    if total_size != 0 and progress_bar.n != total_size:
        logger.error(f"Downloading {file_name}  failed.")
    else:
        logger.success(f"{file_name} successfully downloaded!")

### Download yearly files and subset to PH

In [13]:
PH_BBOX = (116.5, 4.25, 127, 21.5)
years = [2015]

In [14]:
for year in years:
    for data_type in ["min", "max"]:
        try:
            file_url = f"https://data.chc.ucsb.edu/products/CHIRTSdaily/v1.0/global_netcdf_p05/T{data_type}/T{data_type}.{year}.nc"
            download_file(file_url, DEST_PATH / "tmp" / f"T{data_type}_{year}.nc")
            # Subset to PH
            ds = xr.open_dataset(DEST_PATH / "tmp" / f"T{data_type}_{year}.nc")
            ds = ds.sel(
                latitude=slice(PH_BBOX[1], PH_BBOX[3]),
                longitude=slice(PH_BBOX[0], PH_BBOX[2]),
            )
            ds.to_netcdf(DEST_PATH / f"CHIRTS_T{data_type}_PH_{year}.nc")
            os.remove(DEST_PATH / "tmp" / f"T{data_type}_{year}.nc")
        except KeyboardInterrupt:
            logger.error("Process interrupted using keyboard.")
            break

2024-03-27T10:37:28.257160+0800 INFO Downloading: Tmin_2015.nc
2024-03-27T10:37:30.488951+0800 INFO Total size: 26069.61 MB
100%|██████████| 27.3G/27.3G [37:52<00:00, 12.0MiB/s]  
2024-03-27T11:15:22.724528+0800 SUCCESS Tmin_2015.nc successfully downloaded!
2024-03-27T11:15:24.278488+0800 INFO Downloading: Tmax_2015.nc
2024-03-27T11:15:26.267298+0800 INFO Total size: 26069.61 MB
100%|██████████| 27.3G/27.3G [34:48<00:00, 13.1MiB/s]  
2024-03-27T11:50:15.116099+0800 SUCCESS Tmax_2015.nc successfully downloaded!
