In [None]:
import os
import sys
import time as t_util
import numpy as np
import pandas as pd
import yaml
import cftime
import xarray as xr


In [None]:
#Read main path
with open('../path_main.txt', 'r') as file:   path_main = file.read()

dir_scripts   = f'{path_main}Scripts/'
dir_variables = f'{path_main}Data/GSOD/Stations_all_years/'
dir_JJA       = f'{path_main}Data/GSOD/JJA/'
if not os.path.exists(dir_JJA):   os.mkdir(dir_JJA)


## Prepare variables and parameters

In [None]:
#Define cities
cities = ['Istanbul', 'Moscow', 'London', 'SaintPetersburg', 'Berlin', 'Madrid', 'Kyiv', 'Rome', 'Paris',
          'Bucharest', 'Minsk', 'Vienna', 'Hamburg', 'Warsaw', 'Budapest', 'Barcelona', 'Munich', 'Kharkiv',
          'Milan', 'Belgrade', 'Prague', 'NizhnyNovgorod', 'Kazan', 'Sofia', 'Brussels', 'Stockholm', 'Oslo',
          'Dublin', 'Lisbon', 'Vilnius', 'Copenhagen', 'Helsinki', 'Athens', 'Amsterdam', 'Riga', 'Zagreb']

#Define variables
variables = ['TN', 'TX']

#Load city coordinates
fname_coords = dir_scripts + 'City_coordinates.yml'
with open(fname_coords, 'r') as file:
    city_coords = yaml.safe_load(file)

#Get lat and lon of city
city_names  = list(city_coords.keys())
city_coords = np.array(list(city_coords.values()))
lat_city = city_coords[:, 0]
lon_city = city_coords[:, 1]
    
#Select time (reference period)
time_sel = slice('1981', '2010')


## Calculate JJA average

In [None]:
#Loop over variables
for variab in variables:

    print(" -" + variab, end=': ')
    
    #Get all files
    files = [dir_variables + file for file in os.listdir(dir_variables) if variab + '_' in file]
    
    #Initialize dictionary to store data
    data_coll = dict([(city, []) for city in cities])
    
    #Loop over files
    for file in files:
    
        #Read data, lat, lon
        data = xr.open_dataset(file)
        lat = data.lat.values.item()
        lon = data.lon.values.item()

        #Identify city that is closest to station
        min_dist = np.sqrt((lat_city - lat)**2 + (lon_city - lon)**2)
        city_sel = city_names[np.argmin(min_dist)]

        #Select data in reference period
        data = data.sel(time=time_sel)

        #Select summer
        sel_JJA  = (data.time.dt.month>=6) & (data.time.dt.month<=8)
        data_JJA = data.isel(time=sel_JJA)

        #Calculate sum of NaNs
        NaN_sum = np.sum(np.isnan(data_JJA[variab])).values

        #Skip station if too many NaNs or no data in time period
        if (NaN_sum<10*92) or ((len(data_JJA.time)/92>=20) & (len(data_JJA.time)/92<25)):
            flag = 2
        if NaN_sum<5*92:
            flag = 1
        if (NaN_sum>=10*92) or (len(data_JJA.time)/92<20):
            continue
         
        #Calculate summer average
        values = data_JJA.mean('time')[variab].values.item()

        #Save data in dict
        data_coll[city_sel].append([lat, lon, values, flag])
        
    #Loop over cities
    for city in cities:
        
        #Exctract data
        data_sel = data_coll[city]
        
        #Check that data is not empty
        if data_sel==[]:
            print(city)
            continue
        
        #Convert to dataframe
        data_out = pd.DataFrame(data_sel)
        data_out.columns = ['lat', 'lon', variab, 'flag']
        
        #Save to file
        fname_out = dir_JJA + variab + '_GSOD-stations_' + city + ".csv"
        data_out.to_csv(fname_out, index=False)
        
    print('')
    