In [None]:
import os
import sys
import time as t_util
import numpy as np
import pandas as pd
import yaml
import cftime
import xarray as xr

#My functions
sys.path.insert(0,'./../functions/')
import functions_HeatWavesCities as fun_HWC


## Define folders

In [None]:
#Read main paths
with open('../path_main.txt', 'r') as file:   path_main  = file.read()
with open('../path_CMIP5.txt', 'r') as file:  path_cmip5 = file.read()
with open('../path_grids.txt', 'r') as file:  dir_grids  = file.read()
    
dir_CMIP5   = path_cmip5
dir_scripts = f'{path_main}Scripts/'
dir_names   = f'{path_main}Scripts/Model_lists/'
dir_orog    = f'{dir_grids}Orography/'
dir_tmp     = f'{path_main}Data/CMIP5_tmp/'
dir_out     = f'{path_main}Data/CMIP5/Variables/'
if not os.path.exists(dir_out): os.mkdir(dir_out)
if not os.path.exists(dir_tmp): os.mkdir(dir_tmp)
    

## Prepare variables and parameters

In [None]:
#Define cities
cities = ['Istanbul', 'Moscow', 'London', 'SaintPetersburg', 'Berlin', 'Madrid', 'Kyiv', 'Rome', 'Paris',
          'Bucharest', 'Minsk', 'Vienna', 'Hamburg', 'Warsaw', 'Budapest', 'Barcelona', 'Munich', 'Kharkiv',
          'Milan', 'Belgrade', 'Prague', 'NizhnyNovgorod', 'Kazan', 'Sofia', 'Brussels', 'Stockholm', 'Oslo',
          'Dublin', 'Lisbon', 'Vilnius', 'Copenhagen', 'Helsinki', 'Athens', 'Amsterdam', 'Riga', 'Zagreb']

#Define scenarios and variables
scenarios = ['historical', 'rcp85']
variables  = ['tasmax']
variabs_out = ['tasmax']

# Load city coordinates
fname_coords = dir_scripts + 'City_coordinates.yml'
with open(fname_coords, 'r') as file:
    city_coords = yaml.safe_load(file)

#Define models and SSPs which should be used
all_models = dict()
all_models['rcp85'] = []
with open(dir_names + 'Models_CMIP5_RCP85.txt', 'r') as filehandle:
    for line in filehandle:
        all_models['rcp85'].append(line[:-1])
        
#Add models for historical
all_models['historical'] = all_models['rcp85']


## Prepare CMIP5 data

In [None]:
#Create temporary folder
if not os.path.exists(dir_tmp): os.mkdir(dir_tmp)

#Loop over scenarios
for scen in scenarios:

    dir_scen = dir_CMIP5 + scen + '/'
    
    models = all_models[scen]
    
    #Loop over models
    for model in models:

        print(model)

        #Define members
        if model=='EC-EARTH':      members = ['r1i1p1', 'r12i1p1']
        elif model=='MPI-ESM-LR':  members = ['r1i1p1', 'r2i1p1', 'r3i1p1']
        else:                      members = ['r1i1p1']        

        #Select time limits
        if scen=='historical':
            time_sel = slice('1980', '2005')
        elif scen in ['rcp26', 'rcp85']:
            if model in ['HadGEM2-AO', 'HadGEM2-CC', 'HadGEM2-ES']:
                time_sel = slice('2005', '2100')
            else:
                time_sel = slice('2006', '2100')
                
        #Loop over members
        for member in members:

            #Initialize dict to store data
            data_coll = dict()

            #Loop over variables
            for variab in variables:

                print(" -" + variab, end='')

                #Get file names
                fnames = [dir_scen + file for file in os.listdir(dir_scen) if model +'_' in file and member in file and variab in file and scen in file and '_day_' in file]
                fnames = [fname for fname in fnames if int(fname[-11:-7])>=1980]
                t_sta = t_util.time()

                if len(fnames)==0:
                    sys.exit('Input file not uniquely defined')
                elif len(fnames)>1:

                    #Merge single files to one large file
                    file_merge = dir_out + 'CMIP5_merged_' + variab + '_' + scen + "_" + model + '_' + member + '_gridpoint_tmp.nc'
                    if os.path.exists(file_merge): os.remove(file_merge)
                    os.system('cdo mergetime ' + " ".join(fnames) + " " + file_merge)
                    t_end = t_util.time()
                    print(" -- " + "{:.1f}".format(t_end - t_sta), end='')   

                    remove_file = True

                else:
                    file_merge = fnames[0]
                    remove_file = False

                #Load dataset
                with xr.open_dataset(file_merge, use_cftime=True) as ds:
                    data = ds.load()
                    ds.close()

                #Loop over cities
                for city in cities:

                    #Get lat and lon of city
                    lat_sel, lon_sel = city_coords[city]

                    #Convert longitude from [-180, 180] to [0, 360]
                    if lon_sel<0: lon_sel = lon_sel + 360

                    #Find grid point closest to city
                    data_sel = data.sel(lat=lat_sel, lon=lon_sel, method='nearest')

                    #Save in dict
                    data_coll[city + '_' + variab] = data_sel.load()

                #Delete temporary file
                if (remove_file==True) and (dir_out in file_merge):
                    os.remove(file_merge)

                t_end = t_util.time()
                print(", " + "{:.1f}".format(t_end - t_sta))

            #Loop over cities
            for city in cities:

                #Define output folder
                dir_city = dir_out + city  + '/'
                dir_save = dir_city + scen + '/'
                if not os.path.exists(dir_city): os.mkdir(dir_city)
                if not os.path.exists(dir_save): os.mkdir(dir_save)

                #Loop over variables
                for i2, variab in enumerate(variables):          

                    #Add missing 31 December 2099
                    if (model=='bcc-csm1-1') and (scen=='rcp85'):

                        fname_tmp = dir_out + 'tmp_time.nc'
                        data_corr = data_coll[city + '_' + variab]
                        data_full = fun_HWC.add_missing_data(data_corr, fname_tmp, variab, '20991231', '20991231', 1)
                        data_full = data_full.sortby('time')

                        #Save corrected data in dict
                        data_coll[city + '_' + variab] = data_full           

                    #Convert to pandas dataframe
                    data_convert = data_coll[city + '_' + variab].sel(time=time_sel)
                    data_convert = data_convert[variab].to_pandas().to_frame(name=variab)

                    #Put all variables in one dataframe
                    if i2==0:
                        data_out = data_convert
                    else:
                        data_out = pd.concat((data_out, data_convert), axis=1)

                    #Correct sea level pressure
                    if variab=='psl':

                        #Read orography data and get temperature data
                        orog = fun_HWC.get_orog_CMIP6(model, city, city_coords[city], dir_orog)
                        Temp = data_out.loc[:, 'tasmax']

                        #Correct pressure
                        T   = Temp.values
                        psl = data_convert['psl'].values
                        p_corr = fun_HWC.corr_press(psl, orog, T)

                        #Save in data frame and rename psl -> sp
                        data_out.loc[:, 'psl'] = p_corr
                        data_out = data_out.rename(columns={'psl': 'sp'})

                #Create file name
                t1 = str(data_out.index[0].year)
                t2 = str(data_out.index[-1].year)
                fname_out = dir_save + "Variables_" + city + "_" + model + '_' + member + '_' + scen + '_day_' + t1 + "-" + t2 + ".csv"

#                 #Save in file (standard version)
#                 if os.path.exists(fname_out):  os.remove(fname_out)
#                 data_out.to_csv(fname_out) 
                
                #Check if file already exists
                if os.path.exists(fname_out):
                    
                    #Read data and add variables
                    data_read = pd.read_csv(fname_out)
                    for variab in variabs_out:
                        data_read[variab] = data_out[variab].values

                    #Save in file
                    os.remove(fname_out)
                    data_read.to_csv(fname_out)

                else:

                    #Save in file
                    data_out.to_csv(fname_out)

#Remove temporary folder
os.rmdir(dir_tmp)
