In [12]:
import cdsapi
import os.path
from datetime import date, timedelta
from pathlib import Path
import xarray as xr
import pandas as pd

c = cdsapi.Client()

START_YEAR = 2012
DELAY_DAYS = 5
DATA_FOLDER = Path("../../data/era5")
CLIMATE_FOLDER = DATA_FOLDER/"climate"

RENEW_CURRENT_YEAR = False

In [2]:
# this is necessary unfortunately as a query with a period to close to the current day,
# will fail with an error message
until = date.today() - timedelta(days = DELAY_DAYS)
# until

In [3]:
if (RENEW_CURRENT_YEAR)
    # Remove outdated files
    for f in ['last_year.nc', 'last_month.nc']:
        current_file = CLIMATE_FOLDER/f
        if os.path.exists(current_file):
            os.remove(current_file)

In [4]:
queryBase = {
    'dataset': 'reanalysis-era5-single-levels',
    'options': {
        'product_type': 'reanalysis',
        'format': 'netcdf',
        'variable': '2m_temperature',
        'area': [50, 8, 45, 18], # north, west, south, east
        'time': [f"{hour:02d}:00" for hour in range(24)],
        'day': [f"{day:02d}" for day in range(1, (32))],
        'month': [f"{month:02d}" for month in range(1, 13)],
        #'grid': [0.5, 0.5],  # grid in 0.5deg steps in longitude/latitude
    },
}

def download_era5_temperature_years(years, folder):
    for year in years:
        filename = folder/f'{year}.nc'
        if not os.path.exists(filename):
            options = {
                'year': [str(year)],
            }
            c.retrieve(queryBase['dataset'], queryBase['options'] | options, filename)

def download_era5_temperature_last_year(until, folder):
    north, west, south, east = 50.,8, 45, 18
    day, month, year = (until.day, until.month, until.year)

    filename = folder/'last_year.nc' 
    if not os.path.exists(filename):
        options = {
            'month': [f"{month:02d}" for month in range(1, month)],
            'year': [str(year)],
        }
        c.retrieve(queryBase['dataset'], queryBase['options'] | options, filename)

    filename = folder/'last_month.nc' 
    if not os.path.exists(filename):
        options = {
            'day': [f"{day:02d}" for day in range(1, day)],
            'month': [f"{month:02d}"],
            'year': [str(year)],
        }
        c.retrieve(queryBase['dataset'], queryBase['options'] | options, filename)

In [5]:
download_era5_temperature_years(range(START_YEAR, (until.year - 1)), CLIMATE_FOLDER)

In [6]:
print(f'Downloading until {until}')
download_era5_temperature_last_year(until, CLIMATE_FOLDER)

Downloading until 2022-10-10


2022-10-15 22:41:31,534 INFO Welcome to the CDS
2022-10-15 22:41:31,535 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2022-10-15 22:41:31,665 INFO Request is queued
2022-10-15 22:41:32,724 INFO Request is running
2022-10-15 22:47:50,709 INFO Request is completed
2022-10-15 22:47:50,710 INFO Downloading https://download-0001-clone.copernicus-climate.eu/cache-compute-0001/cache/data1/adaptor.mars.internal-1665866793.9426174-22374-16-4eb7790e-ba0e-4006-a602-263c96c6132a.nc to ../../data/era5/climate/last_year.nc (21.5M)
2022-10-15 22:48:02,004 INFO Download rate 1.9M/s                                                   
2022-10-15 22:48:02,139 INFO Welcome to the CDS
2022-10-15 22:48:02,139 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2022-10-15 22:48:02,309 INFO Downloading https://download-0011-clone.copernicus-climate.eu/cache-compute-0011/cache/data2/adaptor.mars.inter

In [45]:
pop = xr.open_dataarray(DATA_FOLDER/'population'/'pop_era5_rel.nc')
# POP TO CSV
# t = pd.melt(pop.to_pandas(), ignore_index = False)
# t.to_csv(DATA_FOLDER/'pop.csv')

In [46]:
temperature = xr.open_mfdataset(str(CLIMATE_FOLDER/'*.nc'))
temperature = temperature.t2m  - 273.15
temperature = temperature.resample(time="d").mean()


# d.isel(expvar=0)

In [58]:
temperature.to_dataframe().to_csv(DATA_FOLDER/'temp.csv')

In [None]:
# OLD CODE

# temperature = xr.open_mfdataset(file)
# temperature = temperature.t2m  - 273.15
# temperature = temperature.resample(time="d").mean()
# temperature = temperature.where(temperature<=12, 20)
# temperature = 20 - temperature
# temperature = (temperature * pop).sum(['longitude','latitude'])

# temperature.to_pandas().max(axis=1).to_csv("data/output/heating-degree-days.csv")

In [None]:
?