In [None]:
import os
import sys
import time as t_util
import numpy as np
import cftime
import xarray as xr
import matplotlib.pyplot as plt
import scipy.stats
import matplotlib
import yaml


In [None]:
#Read main paths
with open('../path_main.txt', 'r') as file:    path_main  = file.read()
with open('../path_gwls.txt', 'r') as file:    path_gwls  = file.read()
    
dir_CMIP5    = f'{path_main}Data/CMIP5/HSIs/'
dir_GWL      = f'{path_gwls}cmip5_all_ens/'
dir_EMT      = f'{path_main}Data/CMIP5/EMT/'
dir_names    = f'{path_main}Scripts/Model_lists/'
dir_out      = f'{path_main}Data/Plot_preparation/HSI_changes/CMIP5/'
if not os.path.exists(dir_out): os.mkdir(dir_out)
    

## Prepare variables and parameters

In [None]:
#Define cities
cities = ['Lisbon', 'Madrid', 'Barcelona', 'Rome', 'Athens', 'Istanbul', 'Sofia', 'Bucharest', 'Belgrade', 'Zagreb',
          'Milan', 'Budapest', 'Munich', 'Vienna', 'Prague', 'Paris', 'Brussels', 'Amsterdam', 'London', 'Dublin',
          'Hamburg', 'Copenhagen', 'Berlin', 'Warsaw', 'Kharkiv', 'Kyiv', 'Minsk', 'Vilnius', 'Riga', 'Moscow',
          'NizhnyNovgorod', 'Kazan', 'SaintPetersburg', 'Helsinki', 'Stockholm', 'Oslo']

#Define HSIs
HSI_names = ['TX']

#Define models and RCPs which should be used
all_models = dict()
all_models['rcp85'] = []
with open(dir_names + 'Models_CMIP5_RCP85.txt', 'r') as filehandle:
    for line in filehandle:
        all_models['rcp85'].append(line[:-1])

#Get model names (including member)
dir_data = dir_CMIP5 + 'Berlin/rcp85/'
model_names = sorted(list(set(["_".join(file.split('_')[2:4]) for file in os.listdir(dir_data)])))

#Read warming levels
fname = dir_GWL + 'cmip5_warming_levels_all_ens_1850_1900_no_bounds_check.yml'
with open(fname, 'r') as file:
    GWL_data = yaml.safe_load(file)

#Define warming levels
GWL_levels = ['10', '20', '30', '40']


## Prepare GWL data

In [None]:
#Define RCP
RCP = 'rcp85'
scenarios = ['historical', RCP]

time_selections = ['JJA_mean', 'Q90', 'Ymax']

#Create empty array for storing heat stress indicators
da_empty = np.zeros((len(model_names), len(cities), len(GWL_levels), len(time_selections))) * np.NaN
data_coll = xr.Dataset(coords={'model':      ('model', model_names),
                               'city':       ('city', cities),
                               'GWL_level':  ('GWL_level', GWL_levels),
                               't_method':   ('t_method', time_selections)})

for HSI in HSI_names:  data_coll[HSI] = (('model', 'city', 'GWL_level', 't_method'), da_empty.copy())

missing_data = dict([ (GWL,set()) for GWL in GWL_levels])
    
#Loop over cities
for city in cities:
    
    print(city, end=', ')

    #Loop over models
    for i1, model in enumerate(all_models[RCP]):

        #Define members
        if model=='EC-EARTH':      members = ['r1i1p1', 'r12i1p1']
        elif model=='MPI-ESM-LR':  members = ['r1i1p1', 'r2i1p1', 'r3i1p1']
        else:                      members = ['r1i1p1']        
        
        #Loop over members
        for member in members:

            #Define name for storing data in array
            model_name = model + '_' + member
            
            #Loop over scenarios
            create = 1
            for scen in scenarios:

                #Define folder
                dir_data = dir_CMIP5 + city + '/' + scen + '/'

                #Read data
                fnames_CMIP5 = [file for file in os.listdir(dir_data) if scen + '_' in file and model + '_' in file and member in file]
                fnames_CMIP5 = [file for file in fnames_CMIP5 if 'HSIs_' + city in file]
                if len(fnames_CMIP5)!=1:  sys.exit('File is not unique')
                data_read = xr.open_dataset(dir_data + fnames_CMIP5[0])

                #Concatenate data
                if create==1:
                    data_CMIP5 = data_read
                    create = 0
                else:
                    data_CMIP5 = xr.concat((data_CMIP5, data_read), dim='time')            

            #Read time periods when certain global warming levels (GWL) are reached
            time_GWL = dict()
            for GWL_level in GWL_levels:
                data_level = GWL_data['warming_level_' + GWL_level]
                entry_sel = [entry for entry in data_level if entry['model']==model and entry['exp']==RCP and entry['ensemble']==member]

                if entry_sel==[]:
                    print(model + ': ' + str(GWL_level) + ' missing')
                    missing_data[GWL_level] = missing_data[GWL_level].union([model])
                    continue

                #Select data in time period when GWL is reached
                start_year = entry_sel[0]['start_year']
                end_year   = entry_sel[0]['end_year']
                time_GWL = slice(str(entry_sel[0]['start_year']), str(entry_sel[0]['end_year']))

                #Select data
                data_20y = data_CMIP5.sel(time=time_GWL)
                data_ref = data_CMIP5.sel(time=slice('1981', '2010'))

                #Calculate summer average
                JJA_20y = data_20y.isel(time=(data_20y.time.dt.month>=6) & (data_20y.time.dt.month<=8))
                JJA_ref = data_ref.isel(time=(data_ref.time.dt.month>=6) & (data_ref.time.dt.month<=8))

                #Calculate yearly maximum
                Ymax_20y = data_20y.resample(time='1Y').max()
                Ymax_ref = data_ref.resample(time='1Y').max()

                #Calculate 90th percentile (for each month in JJA separately)
                create = 1
                for month in np.arange(6, 9):

                    sel_mon_20y = JJA_20y.time.dt.month==month
                    sel_mon_ref = JJA_ref.time.dt.month==month
                    sel_dat_20y = JJA_20y.isel(time=sel_mon_20y)
                    sel_dat_ref = JJA_ref.isel(time=sel_mon_ref)
                    sel_dat_20y = sel_dat_20y.quantile(0.9)
                    sel_dat_ref = sel_dat_ref.quantile(0.9)

                    if create==1:
                        Q90_20y = sel_dat_20y
                        Q90_ref = sel_dat_ref

                        create = 0
                    else:
                        Q90_20y = xr.concat((Q90_20y, sel_dat_20y), dim='month')
                        Q90_ref = xr.concat((Q90_ref, sel_dat_ref), dim='month')

                #Calculate mean of 90th percentiles
                Q90_20y = Q90_20y.mean('month')
                Q90_ref = Q90_ref.mean('month')

                #Calculate change
                data_chg_JJA  = JJA_20y.mean() - JJA_ref.mean()
                data_chg_Q90  = Q90_20y.mean() - Q90_ref.mean()
                data_chg_Ymax = Ymax_20y.mean() - Ymax_ref.mean()

                #Loop over HSIs
                for HSI in HSI_names:

                    #Save in array
                    data_coll[HSI].loc[{"model": model_name, "city": city, "GWL_level": GWL_level, "t_method": "JJA_mean"}] = data_chg_JJA[HSI]
                    data_coll[HSI].loc[{"model": model_name, "city": city, "GWL_level": GWL_level, "t_method": "Q90"}]      = data_chg_Q90[HSI]
                    data_coll[HSI].loc[{"model": model_name, "city": city, "GWL_level": GWL_level, "t_method": "Ymax"}]     = data_chg_Ymax[HSI]

#Save data in file
fname_out = dir_out + 'HSIs-changes_' + RCP + '_GWL.nc'
if os.path.exists(fname_out): os.remove(fname_out)
data_coll.to_netcdf(fname_out)    

print('\nMissing data:')
[print(str(key) + ': ' + str(data)) for key, data in missing_data.items()];


## Prepare data according to European warming (EMT)

In [None]:
#Define RCP
RCP = 'rcp85'
scenarios = ['historical', RCP]

EMT_change     = np.array([1.0, 2.0, 3.0])
EMT_change_str = ['1.0K', '2.0K', '3.0K']

time_selections = ['JJA_mean', 'Q90', 'Ymax']

missing_data = dict([ (EMT,set()) for EMT in EMT_change_str])


#Create empty array for storing heat stress indicators
da_empty = np.zeros((len(model_names), len(cities), len(EMT_change), len(time_selections))) * np.NaN
data_coll = xr.Dataset(coords={'model':          ('model', model_names),
                               'city':           ('city', cities),
                               'EMT_change':     ('EMT_change', EMT_change_str),
                               't_method':       ('t_method', time_selections)})

for HSI in HSI_names:  data_coll[HSI] = (('model', 'city', 'EMT_change', 't_method'), da_empty.copy())
#Loop over cities
for city in cities:
    
    print(city, end=', ')

    #Loop over models
    for i1, model in enumerate(all_models[RCP]):
        
        #Define members
        if model=='EC-EARTH':      members = ['r1i1p1', 'r12i1p1']
        elif model=='MPI-ESM-LR':  members = ['r1i1p1', 'r2i1p1', 'r3i1p1']
        else:                      members = ['r1i1p1']        
        
        #Loop over members
        for member in members:

            #Define name for storing data in array
            model_name = model + '_' + member
            
            #Loop over scenarios
            create = 1
            for scen in scenarios:

                #Define folder
                dir_data = dir_CMIP5 + city + '/' + scen + '/'

                #Read data
                fnames_CMIP5 = [file for file in os.listdir(dir_data) if scen + '_' in file and model + '_' in file and member in file]
                fnames_CMIP5 = [file for file in fnames_CMIP5 if 'HSIs_' + city in file]
                if len(fnames_CMIP5)!=1:  sys.exit('File is not unique')
                data_read = xr.open_dataset(dir_data + fnames_CMIP5[0])

                #Concatenate data
                if create==1:
                    data_CMIP5 = data_read
                    create = 0
                else:
                    data_CMIP5 = xr.concat((data_CMIP5, data_read), dim='time')

            #Read European mean temperature (EMT)
            files_EMT = sorted([dir_EMT + file for file in os.listdir(dir_EMT) if model + '_' in file and 'EMT_' in file and member in file])
            data_EMT  = xr.concat((xr.open_dataset(file) for file in files_EMT), dim='time')

            #Calculate EMT relative to 1981-2010 and calculate 20-year means
            dataEMT_ref = data_EMT.sel(time=slice('1981', '2010')).mean('time')
            dataEMT_rel = data_EMT - dataEMT_ref
            dataEMT_20y = dataEMT_rel.rolling(time=20, center=True).mean()

            #Loop over selected EMT levels
            for dEMT, dEMT_str in zip(EMT_change, EMT_change_str):

                #Identify 20-year period in wich level is reached for first time
                ind  = np.where(dataEMT_20y.tas>dEMT)[0]
                if len(np.where(dataEMT_20y.tas>dEMT)[0])==0:
                    missing_data[dEMT_str] = missing_data[dEMT_str].union([model])
                    continue
                else:
                    ind = ind[0]


                central_year = dataEMT_20y.isel(time=ind).time.dt.year
                start_year   = int(central_year - 20 / 2)
                end_year     = int(central_year + (20 / 2 - 1))
                years_sel    = slice(str(start_year), str(end_year))

                if end_year>2099:
                    print(model, end=': ')
                    print(end_year, end=', ')

                #Select data
                data_20y = data_CMIP5.sel(time=years_sel)
                data_ref = data_CMIP5.sel(time=slice('1981', '2010'))

                #Calculate summer average
                JJA_20y = data_20y.isel(time=(data_20y.time.dt.month>=6) & (data_20y.time.dt.month<=8))
                JJA_ref = data_ref.isel(time=(data_ref.time.dt.month>=6) & (data_ref.time.dt.month<=8))

                #Calculate yearly maximum
                Ymax_20y = data_20y.resample(time='1Y').max()
                Ymax_ref = data_ref.resample(time='1Y').max()

                #Calculate 90th percentile (for each month in JJA separately)
                create = 1
                for month in np.arange(6, 9):

                    sel_mon_20y = JJA_20y.time.dt.month==month
                    sel_mon_ref = JJA_ref.time.dt.month==month
                    sel_dat_20y = JJA_20y.isel(time=sel_mon_20y)
                    sel_dat_ref = JJA_ref.isel(time=sel_mon_ref)
                    sel_dat_20y = sel_dat_20y.quantile(0.9)
                    sel_dat_ref = sel_dat_ref.quantile(0.9)

                    if create==1:
                        Q90_20y = sel_dat_20y
                        Q90_ref = sel_dat_ref

                        create = 0
                    else:
                        Q90_20y = xr.concat((Q90_20y, sel_dat_20y), dim='month')
                        Q90_ref = xr.concat((Q90_ref, sel_dat_ref), dim='month')

                #Calculate mean of 90th percentiles
                Q90_20y = Q90_20y.mean('month')
                Q90_ref = Q90_ref.mean('month')

                #Calculate change
                data_chg_JJA  = JJA_20y.mean() - JJA_ref.mean()
                data_chg_Q90  = Q90_20y.mean() - Q90_ref.mean()
                data_chg_Ymax = Ymax_20y.mean() - Ymax_ref.mean()

                #Loop over HSIs
                for HSI in HSI_names:

                    #Save in array
                    data_coll[HSI].loc[{"model": model_name, "city": city, "EMT_change": dEMT_str, "t_method": "JJA_mean"}] = data_chg_JJA[HSI]
                    data_coll[HSI].loc[{"model": model_name, "city": city, "EMT_change": dEMT_str, "t_method": "Q90"}]      = data_chg_Q90[HSI]
                    data_coll[HSI].loc[{"model": model_name, "city": city, "EMT_change": dEMT_str, "t_method": "Ymax"}]     = data_chg_Ymax[HSI]
                
#Save data in file
fname_out = dir_out + 'HSIs-changes_' + RCP + '_EMT.nc'
if os.path.exists(fname_out): os.remove(fname_out)
data_coll.to_netcdf(fname_out)

print('\nMissing data:')
[print(str(key) + ': ' + str(data)) for key, data in missing_data.items()];


## Prepare time period data

In [None]:
#Select time periods
time_periods = [[2036, 2065],
                [2070, 2099]]

#Define time string
time_strings = [str(time[0]) + '-' + str(time[1]) for time in time_periods]

#Define RCPs
RCPs = ['rcp85']
scenarios = ['historical', RCP]

time_selections = ['JJA_mean', 'Q90', 'Ymax']

#Create empty array for storing heat stress indicators
da_empty = np.zeros((len(model_names), len(cities), len(time_periods), len(time_selections))) * np.NaN
data_coll = xr.Dataset(coords={'model':       ('model', model_names),
                               'city':        ('city', cities),
                               'time_period': ('time_period', time_strings),
                               't_method':    ('t_method', time_selections)})

for HSI in HSI_names:  data_coll[HSI] = (('model', 'city', 'time_period', 't_method'), da_empty.copy())

#Loop over cities
for city in cities:
    
    print(city, end=', ')

    #Loop over models
    for i1, model in enumerate(all_models[RCP]):
        
        #Define members
        if model=='EC-EARTH':      members = ['r1i1p1', 'r12i1p1']
        elif model=='MPI-ESM-LR':  members = ['r1i1p1', 'r2i1p1', 'r3i1p1']
        else:                      members = ['r1i1p1']        
        
        #Loop over members
        for member in members:

            #Define name for storing data in array
            model_name = model + '_' + member
            
            #Loop over scenarios
            create = 1
            for scen in scenarios:

                #Define folder
                dir_data = dir_CMIP5 + city + '/' + scen + '/'

                #Read data
                fnames_CMIP5 = [file for file in os.listdir(dir_data) if scen + '_' in file and model + '_' in file and member in file]
                fnames_CMIP5 = [file for file in fnames_CMIP5 if 'HSIs_' + city in file]
                if len(fnames_CMIP5)!=1:  sys.exit('File is not unique')
                data_read = xr.open_dataset(dir_data + fnames_CMIP5[0])

                #Concatenate data
                if create==1:
                    data_CMIP5 = data_read
                    create = 0
                else:
                    data_CMIP5 = xr.concat((data_CMIP5, data_read), dim='time')            

            #Loop over time periods
            for time_period, time_string in zip(time_periods, time_strings):

                #Select data in selected time period
                time_sel = slice(str(time_period[0]), str(time_period[1]))

                #Select data
                data_20y = data_CMIP5.sel(time=years_sel)
                data_ref = data_CMIP5.sel(time=slice('1981', '2010'))

                #Calculate summer average
                JJA_20y = data_20y.isel(time=(data_20y.time.dt.month>=6) & (data_20y.time.dt.month<=8))
                JJA_ref = data_ref.isel(time=(data_ref.time.dt.month>=6) & (data_ref.time.dt.month<=8))

                #Calculate yearly maximum
                Ymax_20y = data_20y.resample(time='1Y').max()
                Ymax_ref = data_ref.resample(time='1Y').max()

                #Calculate 90th percentile (for each month in JJA separately)
                create = 1
                for month in np.arange(6, 9):

                    sel_mon_20y = JJA_20y.time.dt.month==month
                    sel_mon_ref = JJA_ref.time.dt.month==month
                    sel_dat_20y = JJA_20y.isel(time=sel_mon_20y)
                    sel_dat_ref = JJA_ref.isel(time=sel_mon_ref)
                    sel_dat_20y = sel_dat_20y.quantile(0.9)
                    sel_dat_ref = sel_dat_ref.quantile(0.9)

                    if create==1:
                        Q90_20y = sel_dat_20y
                        Q90_ref = sel_dat_ref

                        create = 0
                    else:
                        Q90_20y = xr.concat((Q90_20y, sel_dat_20y), dim='month')
                        Q90_ref = xr.concat((Q90_ref, sel_dat_ref), dim='month')

                #Calculate mean of 90th percentiles
                Q90_20y = Q90_20y.mean('month')
                Q90_ref = Q90_ref.mean('month')

                #Calculate change
                data_chg_JJA  = JJA_20y.mean() - JJA_ref.mean()
                data_chg_Q90  = Q90_20y.mean() - Q90_ref.mean()
                data_chg_Ymax = Ymax_20y.mean() - Ymax_ref.mean()

                #Loop over HSIs
                for HSI in HSI_names:

                    #Save in array
                    data_coll[HSI].loc[{"model": model_name, "city": city, "time_period": time_string, "t_method": "JJA_mean"}] = data_chg_JJA[HSI]
                    data_coll[HSI].loc[{"model": model_name, "city": city, "time_period": time_string, "t_method": "Q90"}]      = data_chg_Q90[HSI]
                    data_coll[HSI].loc[{"model": model_name, "city": city, "time_period": time_string, "t_method": "Ymax"}]     = data_chg_Ymax[HSI]

#Save data in file
fname_out = dir_out + 'HSIs-changes_' + RCP + '_time-periods.nc'
if os.path.exists(fname_out): os.remove(fname_out)
data_coll.to_netcdf(fname_out)
