In [4]:
# %pip install cdsapi

import os
import cdsapi
from time import time

import xarray as xr
import matplotlib.pyplot as plt

import dotenv
_ = dotenv.load_dotenv()

In [5]:
c = cdsapi.Client(
    url="https://cds.climate.copernicus.eu/api/v2",
    key=os.environ["CDSAPI"],
)  # Replace UID:ApiKey with you UID and Api Key

# define a logger
def print_to_file(*args, **kwargs):
    with open("era5_download.log", "a") as f:
        print(*args, file=f, **kwargs)

In [6]:
year = "2023"
master_init = time()
print_to_file("#"*20, f"Starting download for year {year}", "#"*20, sep="\n")
for month in map(lambda x: str(x).zfill(2), range(1, 13)):
    for hour in map(lambda x: str(x).zfill(2)+":00", range(0, 24)):
        if os.path.exists(f"/home/patel_zeel/aqmsp/aqmsp_data/datasets/era5/era5_{year}_{month}_{hour.replace(':', '-')}.nc"):
            print_to_file(f"era5_year_{year}_month_{month}_hour_{hour}.nc already exists. Skipping...")
            continue
        init = time()
        c.retrieve(
            "reanalysis-era5-land",
            {
                "variable": [
                    "2m_temperature",
                    "skin_temperature",
                    "2m_dewpoint_temperature",
                    "snowfall",
                    "10m_u_component_of_wind",
                    "10m_v_component_of_wind",
                    "surface_pressure",
                    "total_precipitation",
                ],
                "year": year,
                "month": month,
                "day": list(map(lambda x: str(x).zfill(2), range(1, 32))),
                "time": hour,
                "area": [
                    42.00950826967187+0.2,
                    -124.48201686078049-0.2,
                    32.52883673637251-0.2,
                    -114.13122247508855+0.2,  # Bounding box for California
                ],
                "format": "netcdf",
            },
            f"/home/patel_zeel/aqmsp/aqmsp_data/datasets/era5/era5_{year}_{month}_{hour.replace(':', '-')}.nc",  # : is not allowed in file names in Kaggle
        )
        print_to_file(f"Downloaded era5_year_{year}_month_{month}_hour_{hour}.nc", end=" ")
        end = time()
        print_to_file(f"Time taken: {end-init} seconds")

master_end = time()
print_to_file(f"Total time taken: {(master_end-master_init)/60} minutes")

2023-10-01 15:06:34,653 INFO Welcome to the CDS
2023-10-01 15:06:34,654 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-land
2023-10-01 15:06:34,875 INFO Request is completed
2023-10-01 15:06:34,876 INFO Downloading https://download-0018.copernicus-climate.eu/cache-compute-0018/cache/data1/adaptor.mars.internal-1696152459.680013-20437-11-a6b8efbb-e936-4cb7-a707-7292c745f34d.nc to /home/patel_zeel/aqmsp/aqmsp_data/datasets/era5/era5_2023_01_00-00.nc (5.1M)
2023-10-01 15:06:37,469 INFO Download rate 2M/s     
2023-10-01 15:06:37,801 INFO Welcome to the CDS
2023-10-01 15:06:37,802 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-land
2023-10-01 15:06:38,024 INFO Request is queued
2023-10-01 15:06:39,193 INFO Request is running
2023-10-01 15:07:28,671 INFO Request is completed
2023-10-01 15:07:28,672 INFO Downloading https://download-0020.copernicus-climate.eu/cache-compute-0020/cache/data8/adaptor.mars.in

Exception: the request you have submitted is not valid. None of the data you have requested is available yet, please revise the period requested. The latest date available for this dataset is: 2023-09-26 12:00.

In [None]:
era_ds = xr.open_dataset("/home/patel_zeel/aqmsp/aqmsp_data/data/era5/era5_2021_01_00-00.nc")
era_ds

In [None]:
# %pip install git+https://github.com/scotthosking/get-station-data.git
# from get_station_data import ghcnd
# from get_station_data.util import nearest_stn

In [None]:
# lat = 38.13
# lon = -122.0
# stn_md = ghcnd.get_stn_metadata()
# stn_md

In [None]:
# my_stns = nearest_stn(stn_md, lon, lat, 10)
# my_stns

In [None]:
# df = ghcnd.get_data(my_stns, date_range=("2020-12-30", "2021-01-31"))

# df.head()

In [None]:
# t_data = df[df["element"] == "TOBS"][df["station"] == "US1CACC0046"]
# t_data.head()

In [None]:
# tmp_era = era_ds.sel(latitude=lat, longitude=lon, method="nearest")

# # resample to daily and plot
# fig, ax = plt.subplots(figsize=(12, 4))
# tmp_era["t2m"].values