In [None]:
import os
import sys
import time as t_util
import numpy as np
import pandas as pd
import yaml
import cftime
import xarray as xr


In [None]:
#Read main path
with open('../path_main.txt', 'r') as file:   path_main = file.read()
    
dir_scripts   = f'{path_main}Scripts/'
dir_variables = f'{path_main}Data/Stations/'
dir_JJA       = f'{path_main}Data/Stations/JJA/'
if not os.path.exists(dir_JJA):      os.mkdir(dir_JJA)
    

## Prepare variables and parameters

In [None]:
#Define cities
cities = ['Istanbul', 'Moscow', 'London', 'SaintPetersburg', 'Berlin', 'Madrid', 'Kyiv', 'Rome', 'Paris',
          'Bucharest', 'Minsk', 'Vienna', 'Hamburg', 'Warsaw', 'Budapest', 'Barcelona', 'Munich', 'Kharkiv',
          'Milan', 'Belgrade', 'Prague', 'NizhnyNovgorod', 'Kazan', 'Sofia', 'Brussels', 'Stockholm', 'Oslo',
          'Dublin', 'Lisbon', 'Vilnius', 'Copenhagen', 'Helsinki', 'Athens', 'Amsterdam', 'Riga', 'Zagreb']

#Define variables
variables = ['TN', 'TX']

#Load city coordinates
fname_coords = dir_scripts + 'City_coordinates.yml'
with open(fname_coords, 'r') as file:
    city_coords = yaml.safe_load(file)

N_all = dict()


## Calculate JJA average

In [None]:
time_sel = slice('1981', '2010')

#Initialize array for counting NaNs
check_NaN = dict()

#Loop over variables
for variab in variables:

    print(" -" + variab, end=': ')

    #Initialize array for counting NaNs
    check_NaN[variab] = []
    
   
    for city in cities:

        print(city, end=', ')
        
        #Get file names
        dir_files = dir_variables + variab + '/' + city + '/'
        fnames = [dir_files + file for file in os.listdir(dir_files) if variab in file]
        fnames = sorted(fnames)    

        create = 1
        n = 0
        for fname in fnames:

            #Open data set 
            data = xr.open_dataset(fname)#, use_cftime=True)

            #Select data in reference period
            data = data.sel(time=time_sel)
  
            #Select summer
            sel_JJA  = (data.time.dt.month>=6) & (data.time.dt.month<=8)
            data_JJA = data.isel(time=sel_JJA)

            #Calculate sum of NaNs
            NaN_sum = np.sum(np.isnan(data_JJA[variab])).values
            check_NaN[variab].append(NaN_sum)
            
            #Skip station if too many NaNs or no data in time period
            if (NaN_sum<10*92) or ((len(data_JJA.time)/92>=20) & (len(data_JJA.time)/92<25)):
                flag = 2
                n = n + 1     
            if NaN_sum<5*92:
                flag = 1
                n = n + 1
            if (NaN_sum>=10*92) or (len(data_JJA.time)/92<20):
                continue        
            
            #Calculate summer mean, median, Q90, Q99
            data_JJA_mean = data_JJA.mean('time')

            #Calculate average and convert to data frame
            data_pd = data_JJA_mean.to_dataframe()
            data_pd['flag'] = flag
            
            #Save in one big array
            if create==1:
                data_all = data_pd.copy(deep=True)
                create = 0
            else:
                data_all = pd.concat((data_all, data_pd), axis=0)
                
        #Save to file
        if n>0:
            fname_out = dir_JJA + variab + '_Stations_' + city + ".csv"
            data_all.to_csv(fname_out)
        else:
            print(' NO DATA!!!')
        
        
        N_all[time_sel.start + '-' + time_sel.stop + '_' + city + '_' + variab] = n

        
    print('')
    

In [None]:
len(data_JJA.time)/92<25

In [None]:
#Loop over variables
for variab in variables:

    for city in cities:
        
        print(city, end=', ')
        
        N1 = N_all['1981-2010' + '_' + city + '_' + variab]
        N2 = N_all['1981-2005' + '_' + city + '_' + variab]
        
        print(N2, end=', ')
        print(N1, end=',                         ')
        print(N2 - N1, end=',                      ')
        if N2!=0:
            print(N1/N2, end=', ')
        
        print('')
        