In [None]:
import os
import sys
import time as t_util
import numpy as np
import pandas as pd
import yaml
import xarray as xr


## Define folders

In [None]:
#Read main paths
with open('../path_main.txt', 'r') as file:       path_main = file.read()
with open('../path_ERA5-Land.txt', 'r') as file:  path_era5 = file.read()

dir_ERA5     = path_era5
dir_scripts  = f'{path_main}Scripts/'
dir_ERA5_out = f'{path_main}/Data/ERA5-Land/Variables/'
if not os.path.exists(dir_ERA5_out): os.mkdir(dir_ERA5_out)


## Select cities

In [None]:
#Define cities
cities = ['Istanbul', 'Moscow', 'London', 'SaintPetersburg', 'Berlin', 'Madrid', 'Kyiv', 'Rome', 'Paris',
          'Bucharest', 'Minsk', 'Vienna', 'Hamburg', 'Warsaw', 'Budapest', 'Barcelona', 'Munich', 'Kharkiv',
          'Milan', 'Belgrade', 'Prague', 'NizhnyNovgorod', 'Kazan', 'Sofia', 'Brussels', 'Stockholm', 'Oslo',
          'Dublin', 'Lisbon', 'Vilnius', 'Copenhagen', 'Helsinki', 'Athens', 'Amsterdam', 'Riga', 'Zagreb']

# Load city coordinates
fname_coords = dir_scripts + 'City_coordinates.yml'
with open(fname_coords, 'r') as file:
    city_coords = yaml.safe_load(file)

#Define compression level
comp = dict(zlib=True, complevel=2)


## Get ERA5-Land for single grid point

In [None]:
#Define variables
file_names     = ['tasmax', 'tasmin']
var_names      = ['t2m', 't2m']
var_names_out  = ['tasmax', 'tasmin']

#Time limits
time_lim = ['1981', '2010']
time_str = time_lim[0] + '-' + time_lim[1]

#Create pandas data frame
all_dates = pd.date_range(time_lim[0] + '-01-01', time_lim[1] + '-12-31', freq='D')
data_ERA  = pd.DataFrame(index=all_dates, columns=var_names_out)

#Create dictionaries for storing data
all_data_ERA = dict()
for city in cities:
    all_data_ERA[city] = data_ERA.copy(deep=True)

#Loop over variables
for fname, variab, var_out in zip(file_names, var_names, var_names_out):

    #Read ERA5 data
    fname_ERA5 = [dir_ERA5 + file for file in os.listdir(dir_ERA5) if fname in file]
    if len(fname_ERA5)!=1: sys.exit('Filename is not unique')
    data_ERA5_in = xr.open_dataset(fname_ERA5[0]).load()
    
    print(var_out)
    
    #Loop over cities
    for city in cities:

        print(" -" + city)

        #Get lat and lon of city
        lat_sel, lon_sel = city_coords[city]
        
        #Select ERA5 data closest to city and in period 1979-2018
        data_ERA5_sel = data_ERA5_in.sel(latitude=lat_sel, longitude=lon_sel, method='nearest')
        data_ERA5_sel = data_ERA5_sel.sel(time=slice(time_lim[0], time_lim[1]))
    
        #Save in data frame
        all_data_ERA[city].loc[:, var_out] = data_ERA5_sel[variab]
        
#Save data in CSV file
for city in cities:
    all_data_ERA[city].to_csv(dir_ERA5_out + 'Variables_' + city + '_ERA5_day_' + time_str + '.csv')


## Get ERA5-Land for NxN grid box

In [None]:
#Define variables
file_names     = ['tasmax', 'tasmin']
var_names      = ['t2m', 't2m']
var_names_out  = ['tasmax', 'tasmin']

#Time limits
time_lim = ['1981', '2010']
time_str = time_lim[0] + '-' + time_lim[1]

#Create dictionaries for storing data
all_data_ERA = dict()

# N should be uneven!
N = 5

#Loop over variables
for i0, (fname, variab, var_out) in enumerate(zip(file_names, var_names, var_names_out)):
    
    print(var_out)
    
    #Read ERA5 data
    fname_ERA5 = [dir_ERA5 + file for file in os.listdir(dir_ERA5) if fname in file]
    if len(fname_ERA5)!=1: sys.exit('Filename is not unique')
    data_ERA5_in = xr.open_dataset(fname_ERA5[0]).load()
    
    #Re-index longitude
    attrs = data_ERA5_in.longitude.attrs
    data_ERA5_in = data_ERA5_in.assign_coords(longitude=(((data_ERA5_in.longitude + 180) % 360) - 180)).sortby('longitude')
    data_ERA5_in['longitude'].attrs = attrs
    
    #Loop over cities
    for city in cities:

        print(" -" + city)

        #Get lat and lon of city
        lat_sel, lon_sel = city_coords[city]

        #Find grid point closest to city
        lat_city = np.argmin(np.abs(data_ERA5_in.latitude.values - lat_sel))
        lon_city = np.argmin(np.abs(data_ERA5_in.longitude.values - lon_sel))
        
        #Select NxN box around grid point
        N_half = int((N-1)/2)
        lat_rng  = slice(lat_city - N_half, lat_city + N_half + 1)
        lon_rng  = slice(lon_city - N_half, lon_city + N_half + 1)
        data_ERA5_sel = data_ERA5_in.isel(latitude=lat_rng, longitude=lon_rng)
        
        #Select ERA5 data in period 1979-2018
        data_ERA5_sel = data_ERA5_sel.sel(time=slice(time_lim[0], time_lim[1]))
    
        #Save in data frame
        if i0==0:
            data_ERA5_sel = data_ERA5_sel.rename({variab: var_out})
            all_data_ERA[city] = data_ERA5_sel.load()
        else:
            data_ERA5_sel['time'] = all_data_ERA[city].time
            all_data_ERA[city][var_out] = data_ERA5_sel[variab].load()

#Save data in CSV file
for city in cities:
    
    #Define file name
    fname_out = dir_ERA5_out + 'Variables-' + str(N) + 'x' + str(N) + '_' + city + '_ERA5_day_' + time_str + '.nc'
    if os.path.exists(fname_out): os.remove(fname_out)
    
    #Save in NetCDF (with compression)
    encoding = {var: comp for var in all_data_ERA[city].data_vars}
    all_data_ERA[city].to_netcdf(fname_out, encoding=encoding)
