In [None]:
import os
import sys
import time as t_util
import numpy as np
import pandas as pd
import yaml
import cftime
import xarray as xr


In [None]:
#Read main paths
with open('../path_main.txt', 'r') as file:   path_main  = file.read()
with open('../path_grids.txt', 'r') as file:  path_grids = file.read()

dir_scripts    = f'{path_main}Scripts/'
dir_CORDEX     = f'{path_main}Data/EURO-CORDEX/HSIs/'
dir_ERA5L_HSIs = f'{path_main}Data/ERA5-Land/HSIs/'
dir_names      = f'{path_main}Scripts/Model_lists/'
dir_tmp        = f'{path_main}Data/ERA5-Land/'
dir_regr       = f'{path_grids}Regridding/'
dir_COR_out    = f'{path_main}Data/EURO-CORDEX/HSI_stats/'


## Prepare variables and parameters

In [None]:
#Define cities
cities = ['Istanbul', 'Moscow', 'London', 'SaintPetersburg', 'Berlin', 'Madrid', 'Kyiv', 'Rome', 'Paris',
          'Bucharest', 'Minsk', 'Vienna', 'Hamburg', 'Warsaw', 'Budapest', 'Barcelona', 'Munich', 'Kharkiv',
          'Milan', 'Belgrade', 'Prague', 'NizhnyNovgorod', 'Kazan', 'Sofia', 'Brussels', 'Stockholm', 'Oslo',
          'Dublin', 'Lisbon', 'Vilnius', 'Copenhagen', 'Helsinki', 'Athens', 'Amsterdam', 'Riga', 'Zagreb']

#Define scenarios and variables
scenarios = ['historical', 'rcp85']
HSI_names = ['TN', 'TX']

method_str = ['mu_model', 'sigma_model', 'mu_ERA5L', 'sigma_ERA5L']

N = 5
N_str = str(N) + 'x' + str(N)

# Load city coordinates
fname_coords = dir_scripts + 'City_coordinates.yml'
with open(fname_coords, 'r') as file:
    city_coords = yaml.safe_load(file)

#Define models and RCPs which should be used
all_models = []
with open(dir_names + 'Models_CORDEX-EUR-11_RCP85.txt', 'r') as filehandle:
    for line in filehandle:
        all_models.append(eval(line[:-1]))


## Function for selecting NxN grid points around city

In [None]:
def get_city_data(data, city, city_coords, N, var_name, data_source):
    
    #Convert longitude from [0, 360] to [-180, 180]
    if 'longitude' in data.coords:  lat_name, lon_name = 'latitude', 'longitude'
    elif 'lon' in data.coords:      lat_name, lon_name = 'lat', 'lon'
    if data[lon_name].max()>180:
        data[lon_name] = data[lon_name].where(data[lon_name]<180, ((data[lon_name] + 180) % 360) - 180)

    #Get lat and lon of city
    lat_sel, lon_sel = city_coords[city]

    #Find grid point closest to city
    loc_city = (np.abs(data[lon_name] - lon_sel)) + (np.abs(data[lat_name] - lat_sel))
    ind_city = np.unravel_index(np.argmin(loc_city.values), loc_city.shape)

    #Select NxN box around grid point
    N1 = int(N/2 - 0.5)
    N2 = int(N/2 + 0.5)
    lat_rng  = slice(ind_city[0] - N1, ind_city[0] + N2)
    lon_rng  = slice(ind_city[1] - N1, ind_city[1] + N2)

    if 'rlat' in data.dims:   data_city = data.isel(rlat=lat_rng, rlon=lon_rng)
    elif 'x' in data.dims:    data_city = data.isel(y=lat_rng, x=lon_rng)
    else:                     data_city = data.isel(latitude=lat_rng, longitude=lon_rng)

    #Calculate distance from city center
    dist = np.sqrt((data_city[lat_name] - lat_sel)**2 + (data_city[lon_name] - lon_sel)**2)                

    #Convert K to °C
    if (var_name in ['TX', 'TN']) and (data_city[var_name].mean()>200):
        data_city = data_city - 273.15

    return(data_city, dist)


## Calculate mean and SD for EURO-CORDEX and ERA5-Land

In [None]:
data_coll = dict()

#Initialize data frame for collecting data
insert  = np.empty((len(method_str), len(cities))) * np.NaN                
data_pd = pd.DataFrame(data=insert, index=method_str, columns=cities)
for model in all_models:
    for HSI in HSI_names:
        data_coll[HSI + '_' + "_".join(model)] = data_pd.copy()
        
#Loop over cities
for city in cities:

    print(city)
    
    fnames_ERA5L = [dir_ERA5L_HSIs + file for file in os.listdir(dir_ERA5L_HSIs) if city in file and '15x15' in file]
    if len(fnames_ERA5L)!=1:  sys.exit('Number of filenames not correct')

    #Loop over models
    for i1, model in enumerate(all_models):         

        print("_".join(model), end=', ')

        #Get file name
        dir_data = dir_CORDEX + city + '/historical/'
        files_CORDEX = [dir_data + file for file in os.listdir(dir_data) if "_".join(model) in file and '5x5' in file]
        if len(files_CORDEX)!=1:  sys.exit('Filename is not unique')
        
        #Regrid ERA5-Land to CORDEX model grid
        file_target = files_CORDEX[0]
        fname_in   = fnames_ERA5L[0]
        fname_regr = dir_tmp + 'ERA5-Land_Europe_1981-2010_regr_'  + "_".join(model) + '_' + city + '.nc'
        file_grid  = dir_tmp + 'grid_xy' + "_".join(model) + '_' + city
        if os.path.exists(file_grid):   os.remove(file_grid)
        if os.path.exists(fname_regr):  os.remove(fname_regr)
        os.system("cdo griddes -selvar,TX " + file_target + " > " + file_grid)
        os.system("cdo remapbil," + file_grid + " " + fname_in + " " + fname_regr)
        
        #Read regridded ERA5 data
        data_city_ERA5Ld = xr.open_dataset(fname_regr, use_cftime=True)

        #Loop over scenarios
        for scen in scenarios:

            #Get file name
            dir_data = dir_CORDEX + city + '/' + scen + '/'
            files = [file for file in os.listdir(dir_data) if model[0] in file and model[1] in file and model[2] in file]
            files = [file for file in files if "HSIs-" + N_str in file]
            if len(files)!=1:  sys.exit('File is not unique')

            #Read CORDEX data
            data_read = xr.open_dataset(dir_data + files[0])

            #Concatenate historical and future data
            if scen=='historical':  data_city_CORDEX = data_read
            else:                   data_city_CORDEX = xr.concat((data_city_CORDEX, data_read), dim='time')  

        #Get lat and lon names
        if 'rlat' in data_city_CORDEX.dims:
            lat_name = 'rlat'
            lon_name = 'rlon'
        elif 'x' in data_city_CORDEX.dims:
            lat_name = 'x'
            lon_name = 'y'      
        else:
            sys.exit('Lat and lon names undefined')
            
        #Check that CORDEX and ERA5-Land coordinates agree
        check1 = np.max(np.abs(data_city_ERA5Ld[lat_name].values - data_city_CORDEX[lat_name].values))
        check2 = np.max(np.abs(data_city_ERA5Ld[lon_name].values - data_city_CORDEX[lon_name].values))
        if (check1>0.0001) or (check2>0.0001):  sys.exit('Coordinates do not agree')

        #Re-index data
        if (check1!=0) or (check2!=0):
            data_city_ERA5Ld = data_city_ERA5Ld.reindex({lat_name: data_city_CORDEX[lat_name], lon_name: data_city_CORDEX[lon_name]}, method='nearest')

        #Select data in time period
        time_sel = slice('1981', '2010')
        data_city_CORDEX = data_city_CORDEX.sel(time=time_sel)
        data_city_ERA5Ld = data_city_ERA5Ld.sel(time=time_sel)

        #Select data in summer (JJA)
        sel_JJA_CORDEX  = (data_city_CORDEX.time.dt.month>=6) & (data_city_CORDEX.time.dt.month<=8)
        sel_JJA_ERA5Ld  = (data_city_ERA5Ld.time.dt.month>=6) & (data_city_ERA5Ld.time.dt.month<=8)
        data_JJA_CORDEX = data_city_CORDEX.isel(time=sel_JJA_CORDEX)
        data_JJA_ERA5Ld = data_city_ERA5Ld.sel(time=sel_JJA_ERA5Ld)
        
        #Apply ERA5-Land masking to CORDEX and vice-versa
        mask1 = ~np.isnan(data_JJA_ERA5Ld.mean('time'))
        data_JJA_CORDEX = data_JJA_CORDEX.where(mask1)
        mask2 = ~np.isnan(data_JJA_CORDEX.mean('time'))
        data_JJA_ERA5Ld = data_JJA_ERA5Ld.where(mask2)
        
        #Loop over HSIs
        for HSI in HSI_names:

            #Save in dataframe
            data_coll[HSI + '_' + "_".join(model)].loc['mu_model', city] = data_JJA_CORDEX[HSI].mean()
            data_coll[HSI + '_' + "_".join(model)].loc['mu_ERA5L', city] = data_JJA_ERA5Ld[HSI].mean()
            data_coll[HSI + '_' + "_".join(model)].loc['sigma_model', city] = data_JJA_CORDEX[HSI].std()
            data_coll[HSI + '_' + "_".join(model)].loc['sigma_ERA5L', city] = data_JJA_ERA5Ld[HSI].std()

        #Remove temporary regridded file
        os.remove(file_grid)
        os.remove(fname_regr)

    #Remove temporary time merged file
    print('')
    
#Save data in file   
for model in all_models:
    for HSI in HSI_names:    
        
        #Define file name
        dir_save  = dir_COR_out + HSI + '/'
        fname_out = dir_save + HSI + '_mean_std_JJA_' + "_".join(model) + "_JJA_1981-2010.csv"
        if not os.path.exists(dir_save):  os.mkdir(dir_save)
            
        #Save data
        data_save = data_coll[HSI + '_' + "_".join(model)]
        data_save.to_csv(fname_out)    
            