In [3]:
# %pip install cdsapi

import os
import cdsapi
from time import time

import xarray as xr
import matplotlib.pyplot as plt

import dotenv
_ = dotenv.load_dotenv()

In [4]:
c = cdsapi.Client(
    url="https://cds.climate.copernicus.eu/api/v2",
    key=os.environ["CDSAPI"],
)  # Replace UID:ApiKey with you UID and Api Key

# define a logger
def print_to_file(*args, **kwargs):
    with open("era5_download.log", "a") as f:
        print(*args, file=f, **kwargs)

In [5]:
year = "2022"
master_init = time()
print_to_file("#"*20, f"Starting download for year {year}", "#"*20, sep="\n")
for month in map(lambda x: str(x).zfill(2), range(1, 13)):
    for hour in map(lambda x: str(x).zfill(2)+":00", range(0, 24)):
        if os.path.exists(f"/home/patel_zeel/aqmsp/aqmsp_data/datasets/era5/delhi/era5_{year}_{month}_{hour.replace(':', '-')}.nc"):
            print_to_file(f"era5_year_{year}_month_{month}_hour_{hour}.nc already exists. Skipping...")
            continue
        init = time()
        c.retrieve(
            "reanalysis-era5-land",
            {
                "variable": [
                    "2m_temperature",
                    "skin_temperature",
                    "2m_dewpoint_temperature",
                    "snowfall",
                    "10m_u_component_of_wind",
                    "10m_v_component_of_wind",
                    "surface_pressure",
                    "total_precipitation",
                ],
                "year": year,
                "month": month,
                "day": list(map(lambda x: str(x).zfill(2), range(1, 32))),
                "time": hour,
                # "area": [
                #     42.00950826967187+0.2,
                #     -124.48201686078049-0.2,
                #     32.52883673637251-0.2,
                #     -114.13122247508855+0.2,  # Bounding box for California
                # ],
                # put bounding box for Delhi
                "area": [
                    30.0,
                    75.0,
                    27.0,
                    79.0,
                ],
                "format": "netcdf",
            },
            f"/home/patel_zeel/aqmsp/aqmsp_data/datasets/era5/delhi/era5_{year}_{month}_{hour.replace(':', '-')}.nc",  # : is not allowed in file names in Kaggle
        )
        print_to_file(f"Downloaded era5_year_{year}_month_{month}_hour_{hour}.nc", end=" ")
        end = time()
        print_to_file(f"Time taken: {end-init} seconds")

master_end = time()
print_to_file(f"Total time taken: {(master_end-master_init)/60} minutes")

2024-02-19 03:01:05,494 INFO Welcome to the CDS
2024-02-19 03:01:05,495 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-land
2024-02-19 03:01:05,785 INFO Request is queued
2024-02-19 04:15:54,113 INFO Request is running
2024-02-19 04:17:54,882 INFO Request is completed
2024-02-19 04:17:54,883 INFO Downloading https://download-0008-clone.copernicus-climate.eu/cache-compute-0008/cache/data9/adaptor.mars.internal-1708295780.0083027-2991-11-4c935566-12e8-46b4-9e46-42eb1c7a732f.nc to /home/patel_zeel/aqmsp/aqmsp_data/datasets/era5/delhi/era5_2022_01_00-00.nc (618.9K)
                                          

FileNotFoundError: [Errno 2] No such file or directory: '/home/patel_zeel/aqmsp/aqmsp_data/datasets/era5/delhi/era5_2022_01_00-00.nc'

In [None]:
era_ds = xr.open_dataset("/home/patel_zeel/aqmsp/aqmsp_data/data/era5/era5_2021_01_00-00.nc")
era_ds

In [None]:
# %pip install git+https://github.com/scotthosking/get-station-data.git
# from get_station_data import ghcnd
# from get_station_data.util import nearest_stn

In [None]:
# lat = 38.13
# lon = -122.0
# stn_md = ghcnd.get_stn_metadata()
# stn_md

In [None]:
# my_stns = nearest_stn(stn_md, lon, lat, 10)
# my_stns

In [None]:
# df = ghcnd.get_data(my_stns, date_range=("2020-12-30", "2021-01-31"))

# df.head()

In [None]:
# t_data = df[df["element"] == "TOBS"][df["station"] == "US1CACC0046"]
# t_data.head()

In [None]:
# tmp_era = era_ds.sel(latitude=lat, longitude=lon, method="nearest")

# # resample to daily and plot
# fig, ax = plt.subplots(figsize=(12, 4))
# tmp_era["t2m"].values