In [None]:
import os
import sys
import time as t_util
import numpy as np
import pandas as pd
import yaml
import cftime
import xarray as xr

#My functions
sys.path.insert(0,'./../functions/')
import functions_HeatWavesCities as fun_HWC


## Define folders

In [None]:
#Read main paths
with open('../path_main.txt', 'r') as file:   path_main  = file.read()
with open('../path_CMIP6.txt', 'r') as file:  path_cmip6 = file.read()
with open('../path_grids.txt', 'r') as file:  dir_grids  = file.read()
    
dir_CMIP6   = path_cmip6
dir_scripts = f'{path_main}Scripts/'
dir_names   = f'{path_main}Scripts/Model_lists/'
dir_orog    = f'{dir_grids}Orography/'
dir_tmp     = f'{path_main}Data/CMIP6_tmp/'
dir_out     = f'{path_main}Data/CMIP6/Variables/'
if not os.path.exists(dir_out): os.mkdir(dir_out)
if not os.path.exists(dir_tmp): os.mkdir(dir_tmp)
    

## Prepare variables and parameters

In [None]:
#Define cities
cities = ['Istanbul', 'Moscow', 'London', 'SaintPetersburg', 'Berlin', 'Madrid', 'Kyiv', 'Rome', 'Paris',
          'Bucharest', 'Minsk', 'Vienna', 'Hamburg', 'Warsaw', 'Budapest', 'Barcelona', 'Munich', 'Kharkiv',
          'Milan', 'Belgrade', 'Prague', 'NizhnyNovgorod', 'Kazan', 'Sofia', 'Brussels', 'Stockholm', 'Oslo',
          'Dublin', 'Lisbon', 'Vilnius', 'Copenhagen', 'Helsinki', 'Athens', 'Amsterdam', 'Riga', 'Zagreb']

#Define scenarios and variables
scenarios = ['historical', 'ssp585']
variables   = ['tasmax']
variabs_out = ['tasmax']

# Load city coordinates
fname_coords = dir_scripts + 'City_coordinates.yml'
with open(fname_coords, 'r') as file:
    city_coords = yaml.safe_load(file)

#Define models and SSPs which should be used
all_models = dict()
all_models['ssp585'] = []
with open(dir_names + 'Models_CMIP6_SSP585.txt', 'r') as filehandle:
    for line in filehandle:
        all_models['ssp585'].append(line[:-1])
        
#Add models for historical
all_models['historical'] = all_models['ssp585']


## Prepare CMIP6 data

In [None]:
#Create temporary folder
if not os.path.exists(dir_tmp): os.mkdir(dir_tmp)

#Loop over scenarios
for scen in scenarios:

    models = all_models[scen]
    
    #Select time limits
    if scen=='historical':
        time_sel = slice('1980', '2014')
    elif scen in ['ssp126', 'ssp585']:
        time_sel = slice('2015', '2100')    

    #Loop over models
    for model in models:
        
        print(model)
            
        #Initialize dict to store data
        data_coll = dict()
            
        #Loop over variables
        for variab in variables:

            print(" -" + variab, end='')

            #Get file names
            fnames = [dir_CMIP6 + file for file in os.listdir(dir_CMIP6) if model +'_' in file and variab in file and scen in file]
            if model=='EC-Earth3-Veg' and len(fnames)!=1:
                fnames = [file for file in fnames if '_gr_' not in file]
            if len(fnames)!=1:
                sys.exit('Input file not uniquely defined')
            else:
                fname_in = fnames[0]
            
            #Select time period for historical data
            t_sta = t_util.time()
            if scen=='historical':
                data = xr.open_dataset(fname_in, use_cftime=True)
                data = data.sel(time=time_sel)
                fname_tmp = dir_tmp + model + '_' + scen + '_' + variab + '_tmp.nc'
                data.to_netcdf(fname_tmp)
            else:
                fname_tmp = fname_in
            
            #Load dataset
            with xr.open_dataset(fname_tmp, use_cftime=True) as ds:
                data = ds.load()
                ds.close()

            #Loop over cities
            for city in cities:

                #Get lat and lon of city
                lat_sel, lon_sel = city_coords[city]

                #Convert longitude from [-180, 180] to [0, 360]
                if lon_sel<0: lon_sel = lon_sel + 360
                
                #Find grid point closest to city
                data_sel = data.sel(lat=lat_sel, lon=lon_sel, method='nearest')
                    
                #Save in dict
                data_coll[city + '_' + variab] = data_sel.load()
            
            #Delete temporary file
            if scen=='historical':
                os.remove(fname_tmp)
            t_end = t_util.time()
            print(", " + "{:.1f}".format(t_end - t_sta))
            
        #Loop over cities
        for city in cities:
            
            #Define output folder
            dir_city = dir_out + city  + '/'
            dir_save = dir_city + scen + '/'
            if not os.path.exists(dir_city): os.mkdir(dir_city)
            if not os.path.exists(dir_save): os.mkdir(dir_save)
            
            #Loop over variables
            for i2, variab in enumerate(variables):          
                    
                #Convert to pandas dataframe
                data_convert = data_coll[city + '_' + variab].sel(time=time_sel)
                data_convert = data_convert[variab].to_pandas().to_frame(name=variab)
                
                #Put all variables in one dataframe
                if i2==0:
                    data_out = data_convert
                else:
                    data_out = pd.concat((data_out, data_convert), axis=1)
                
                #Correct sea level pressure
                if variab=='psl':

                    #Read orography data and get temperature data
                    orog = fun_HWC.get_orog_CMIP6(model, city, city_coords[city], dir_orog)
                    Temp = data_out.loc[:, 'tasmax']

                    #Correct pressure
                    T   = Temp.values
                    psl = data_convert['psl'].values
                    p_corr = fun_HWC.corr_press(psl, orog, T)

                    #Save in data frame and rename psl -> sp
                    data_out.loc[:, 'psl'] = p_corr
                    data_out = data_out.rename(columns={'psl': 'sp'})
                    
            #Create file name
            t1 = str(data_out.index[0].year)
            t2 = str(data_out.index[-1].year)
            fname_out = dir_save + "Variables_" + city + "_" + model + '_' + scen + '_day_' + t1 + "-" + t2 + ".csv"
            
            #Check if file already exists
            if os.path.exists(fname_out):
                
                #Read data and add variables
                data_read = pd.read_csv(fname_out)
                for variab in variabs_out:
                    data_read[variab] = data_out[variab].values
                
                #Save in file
                os.remove(fname_out)
                data_read.to_csv(fname_out)
                
            else:
                
                #Save in file
                data_out.to_csv(fname_out)
                
#Remove temporary folder
os.rmdir(dir_tmp)
