In [None]:
import os
import sys
import time as t_util
import numpy as np
import pandas as pd
import yaml
import cftime
import xarray as xr


In [None]:
#Read main path
with open('../path_main.txt', 'r') as file:   path_main  = file.read()
    
dir_scripts = f'{path_main}Scripts/'
dir_CORDEX  = f'{path_main}Data/EURO-CORDEX/HSIs/'
dir_EMT     = f'{path_main}Data/EURO-CORDEX/EMT/'
dir_names   = f'{path_main}Scripts/Model_lists/'
dir_out     = f'{path_main}Data/EURO-CORDEX/ANOVA_input/'


## Prepare variables and parameters

In [None]:
#Define cities
cities = ['Lisbon', 'Madrid', 'Barcelona', 'Rome', 'Athens', 'Istanbul', 'Sofia', 'Bucharest', 'Belgrade', 'Zagreb',
          'Milan', 'Budapest', 'Munich', 'Vienna', 'Prague', 'Paris', 'Brussels', 'Amsterdam', 'London', 'Dublin',
          'Hamburg', 'Copenhagen', 'Berlin', 'Warsaw', 'Kharkiv', 'Kyiv', 'Minsk', 'Vilnius', 'Riga', 'Moscow',
          'NizhnyNovgorod', 'Kazan', 'SaintPetersburg', 'Helsinki', 'Stockholm', 'Oslo']

#Define HSIs
HSI_names = ['TN', 'TX']

#Define models and RCPs which should be used
all_models = dict()
all_models['rcp26'] = []
all_models['rcp85'] = []
with open(dir_names + 'Models_CORDEX-EUR-11_RCP26.txt', 'r') as filehandle:
    for line in filehandle:
        all_models['rcp26'].append(eval(line[:-1]))
with open(dir_names + 'Models_CORDEX-EUR-11_RCP85.txt', 'r') as filehandle:
    for line in filehandle:
        all_models['rcp85'].append(eval(line[:-1]))

#Define time periods
time_periods = [[1981, 2010], [2070, 2099]]
time_strings = [str(time[0]) + '-' + str(time[1]) for time in time_periods]

#Define changes in EMT relative to 1981-2010
EMT_change     = np.array([1.0, 2.0, 3.0])
EMT_change_str = ['1.0K', '2.0K', '3.0K', '1981-2010']

#Define RCP
RCP = 'rcp85'

N_gridcells = ''
# N_gridcells = '_3x3'


## Prepare time period data

In [None]:
#Define RCP
RCP = 'rcp85'
scenarios = ['historical', RCP]

indicators = ['mean', 'SD']

#Combine model names to one string
mod_names = ["_".join(model) for model in all_models[RCP]]


#Create empty array for storing heat stress indicators
da_empty  = np.zeros((len(all_models[RCP]), len(cities), len(time_periods), len(indicators))) * np.NaN
data_coll = xr.Dataset(coords={'model':       ('model', mod_names),
                               'city':        ('city', cities),
                               'time_period': ('time_period', time_strings),
                               'indicator':      ('indicator', indicators)})

for HSI in HSI_names:  data_coll[HSI] = (('model', 'city', 'time_period', 'indicator'), da_empty.copy())
    
#Loop over cities
for city in cities:
    
    print(city, end=', ')

    #Loop over models
    for i1, model in enumerate(all_models[RCP]):

        #Get name of CMIP5 driving model
        if ('CNRM-CERFAC' in model[0]) or ('CSIRO-QCCCE' in model[0]) or ('MPI-M' in model[0]) or ('NOAA-GFDL' in model[0]):
            mod_CMIP5 = '-'.join(model[0].split('-')[2:])
        else:
            mod_CMIP5 = '-'.join(model[0].split('-')[1:])
            
        #Loop over scenarios
        create = 1
        for scen in scenarios:

            #Define folder
            dir_data = dir_CORDEX + city + '/' + scen + '/'

            #Read data
            fnames_CORDEX = [file for file in os.listdir(dir_data) if scen + '_' in file and model[0] in file and model[1] in file and model[2] in file]
            if N_gridcells=='':  fnames_CORDEX = [file for file in fnames_CORDEX if 'HSIs_' + city in file]
            else:                fnames_CORDEX = [file for file in fnames_CORDEX if N_gridcells[1::] in file]
            if len(fnames_CORDEX)!=1:  sys.exit('File is not unique')
            data_read = xr.open_dataset(dir_data + fnames_CORDEX[0])

            #Concatenate data
            if create==1:
                data_CORDEX = data_read
                create = 0
            else:
                data_CORDEX = xr.concat((data_CORDEX, data_read), dim='time')
        
        #Loop over time periods
        for time_period, time_string in zip(time_periods, time_strings):

            #Select data in selected time period
            time_sel = slice(str(time_period[0]), str(time_period[1]))
               
            #Select data
            data_30y = data_CORDEX.sel(time=time_sel)
            JJA_30y  = data_30y.isel(time=(data_30y.time.dt.month>=6) & (data_30y.time.dt.month<=8))

            #Calculate mean and interannual SD
            HSI_mean = JJA_30y.mean('time').mean()
            HSI_std  = JJA_30y.std('time').mean()
            
            #Loop over HSIs
            for HSI in HSI_names:
                
                #Save in array
                data_coll[HSI].loc[{"model": "_".join(model), "city": city, "time_period": time_string, "indicator": "mean"}] = HSI_mean[HSI]
                data_coll[HSI].loc[{"model": "_".join(model), "city": city, "time_period": time_string, "indicator": "SD"}]   = HSI_std[HSI]
                
#Save data in file
fname_out1 = dir_out + 'ANOVA' + N_gridcells + '_' + 'HSIs-mean_' + RCP + '_time-periods.nc'
fname_out2 = dir_out + 'ANOVA' + N_gridcells + '_' + 'HSIs-SD_' + RCP + '_time-periods.nc'
if os.path.exists(fname_out1): os.remove(fname_out1)
if os.path.exists(fname_out2): os.remove(fname_out2)
data_coll.sel(indicator='mean').to_netcdf(fname_out1)
data_coll.sel(indicator='SD').to_netcdf(fname_out2)


## Prepare EMT data

In [None]:
#Define RCP
RCP = 'rcp85'
scenarios = ['historical', RCP]

indicators = ['mean', 'SD']#, 'SD_max']

#Combine model names to one string
mod_names = ["_".join(model) for model in all_models[RCP]]


#Create empty array for storing heat stress indicators
da_empty  = np.zeros((len(all_models[RCP]), len(cities), len(EMT_change_str), len(indicators))) * np.NaN
data_coll = xr.Dataset(coords={'model':      ('model', mod_names),
                               'city':       ('city', cities),
                               'EMT_change': ('EMT_change', EMT_change_str),
                               'indicator':  ('indicator', indicators)})

for HSI in HSI_names:  data_coll[HSI] = (('model', 'city', 'EMT_change', 'indicator'), da_empty.copy())
    
#Loop over cities
for city in cities:
    
    print(city, end=', ')

    #Loop over models
    for i1, model in enumerate(all_models[RCP]):

        #Get name of CMIP5 driving model
        if ('CNRM-CERFAC' in model[0]) or ('CSIRO-QCCCE' in model[0]) or ('MPI-M' in model[0]) or ('NOAA-GFDL' in model[0]):
            mod_CMIP5 = '-'.join(model[0].split('-')[2:])
        else:
            mod_CMIP5 = '-'.join(model[0].split('-')[1:])
            
        #Loop over scenarios
        create = 1
        for scen in scenarios:

            #Define folder
            dir_data = dir_CORDEX + city + '/' + scen + '/'

            #Read data
            fnames_CORDEX = [file for file in os.listdir(dir_data) if scen + '_' in file and model[0] in file and model[1] in file and model[2] in file]
            if N_gridcells=='':  fnames_CORDEX = [file for file in fnames_CORDEX if 'HSIs_' + city in file]
            else:                fnames_CORDEX = [file for file in fnames_CORDEX if N_gridcells[1::] in file]
            if len(fnames_CORDEX)!=1:  sys.exit('File is not unique')
            data_read = xr.open_dataset(dir_data + fnames_CORDEX[0])

            #Concatenate data
            if create==1:
                data_CORDEX = data_read
                create = 0
            else:
                data_CORDEX = xr.concat((data_CORDEX, data_read), dim='time')
        
        #Read European mean temperature (EMT)
        files_EMT = sorted([dir_EMT + file for file in os.listdir(dir_EMT) if "_".join(model) in file and 'EMT_' in file])
        data_EMT  = xr.concat((xr.open_dataset(file) for file in files_EMT), dim='time')

        #Calculate EMT relative to 1981-2010 and calculate 20-year means
        dataEMT_ref = data_EMT.sel(time=slice('1981', '2010')).mean('time')
        dataEMT_rel = data_EMT - dataEMT_ref
        dataEMT_20y = dataEMT_rel.rolling(time=20, center=True).mean()

        #Loop over selected EMT levels
        for dEMT, dEMT_str in zip(EMT_change, EMT_change_str):

            #Identify 20-year period in wich level is reached for first time
            ind  = np.where(dataEMT_20y.tas>dEMT)[0][0]
            central_year = dataEMT_20y.isel(time=ind).time.dt.year
            start_year   = int(central_year - 20 / 2)
            end_year     = int(central_year + (20 / 2 - 1))
            years_sel    = slice(str(start_year), str(end_year))

            if end_year>2099:
                print(model)
                print(end_year)

            #Select data and get only summer
            data_20y = data_CORDEX.sel(time=years_sel)
            JJA_20y  = data_20y.isel(time=(data_20y.time.dt.month>=6) & (data_20y.time.dt.month<=8))

            #Calculate mean and interannual SD
            HSI_mean = JJA_20y.mean('time').mean()
            HSI_std  = JJA_20y.std('time').mean()
            
            #Loop over HSIs
            for HSI in HSI_names:
                
                #Save in array
                data_coll[HSI].loc[{"model": "_".join(model), "city": city, "EMT_change": dEMT_str, "indicator": "mean"}] = HSI_mean[HSI]
                data_coll[HSI].loc[{"model": "_".join(model), "city": city, "EMT_change": dEMT_str, "indicator": "SD"}]   = HSI_std[HSI]

#Save data in file
fname_out1 = dir_out + 'ANOVA' + N_gridcells + '_' + 'HSIs-mean_' + RCP + '_EMT.nc'
fname_out2 = dir_out + 'ANOVA' + N_gridcells + '_' + 'HSIs-SD_' + RCP + '_EMT.nc'
if os.path.exists(fname_out1): os.remove(fname_out1)
if os.path.exists(fname_out2): os.remove(fname_out2)
data_coll.sel(indicator='mean').to_netcdf(fname_out1)
data_coll.sel(indicator='SD').to_netcdf(fname_out2)
