In [1]:
import os
import sys
import time as t_util
import numpy as np
import pandas as pd
import yaml
import xarray as xr


## Define folders

In [2]:
#Read main paths
with open('../path_main.txt', 'r') as file:   path_main  = file.read()

dir_stations = f'{path_main}Data/Europe_station_data/'
dir_scripts  = f'{path_main}Scripts/PROJECT_HeatWavesCities/'
dir_STA_out  = f'{path_main}Data/Stations/'
if not os.path.exists(dir_STA_out): os.mkdir(dir_STA_out)


## Select cities

In [3]:
#Define cities
cities = ['Istanbul', 'Moscow', 'London', 'SaintPetersburg', 'Berlin', 'Madrid', 'Kyiv', 'Rome', 'Paris',
          'Bucharest', 'Minsk', 'Vienna', 'Hamburg', 'Warsaw', 'Budapest', 'Barcelona', 'Munich', 'Kharkiv',
          'Milan', 'Belgrade', 'Prague', 'NizhnyNovgorod', 'Kazan', 'Sofia', 'Brussels', 'Stockholm', 'Oslo',
          'Dublin', 'Lisbon', 'Vilnius', 'Copenhagen', 'Helsinki', 'Athens', 'Amsterdam', 'Riga', 'Zagreb']

# Load city coordinates
fname_coords = dir_scripts + 'City_coordinates.yml'
with open(fname_coords, 'r') as file:
    city_coords = yaml.safe_load(file)


## Get station information

In [4]:
#Define variables
variables = ['TN']#'TG', 'TX', 'HU', 'PP']

#Initialize dictionaries
all_stations  = dict()

#Loop over variables
for variab in variables:

    #Read file with information about all weather stations
    fname = dir_stations + 'stations_' + variab + '.txt'
    f = open(fname, 'r')
    lines = f.readlines()[21:]
    f.close()

    #Split station information
    stations = [line.split(',') for line in lines]

    #Loop over all stations
    stations_new = []
    for station in stations:

        #Get ID, name, lat, lon, and height of station
        staID     = int(station[0])
        staNAME   = station[1].replace(" ", "")
        staLAT    = station[3]
        staLON    = station[4]
        staHEIGHT = int(station[5])

        #Convert lat and lon from DMS to decimal degrees
        lat_deg = float(staLAT.split(':')[0])
        lon_deg = float(staLON.split(':')[0])
        lat_min = float(staLAT.split(':')[1])
        lon_min = float(staLON.split(':')[1])
        lat_sec = float(staLAT.split(':')[2])    
        lon_sec = float(staLON.split(':')[2])
        sig_lon = float(staLON[0] + '1')
        sig_lat = float(staLAT[0] + '1') 
        lat = sig_lat * (np.abs(lat_deg) + lat_min / 60 + lat_sec / 3600)
        lon = sig_lon * (np.abs(lon_deg) + lon_min / 60 + lon_sec / 3600)

        #Save selected information in new station list
        stations_new.append([staID, staNAME, lat, lon, staHEIGHT])
        
    #Save in dict
    all_stations[variab] = stations_new
    

## Convert to pandas

In [6]:
#Loop over variables
for ii, variab in enumerate(variables):
        
    #Loop over cities
    for city in cities:
        
        print(" - " + city , end=': ')
        
        #Define folders
        dir_variab = dir_STA_out + variab + '/'
        dir_city   = dir_variab + city + '/'
        if not os.path.exists(dir_variab):  os.mkdir(dir_variab)
        if not os.path.exists(dir_city):    os.mkdir(dir_city)
        
        #Get lat and lon of city
        lat_sel, lon_sel = city_coords[city]

        #Calculate euclidean distance to all stations
        dist_max = 1 # unit: degrees
        dist = [np.sqrt((station[2] - lat_sel)**2 + (station[3] - lon_sel)**2) for station in all_stations[variab]]

        #Get index and station ID of station within a certain distance to city coordinates
        indizes = np.where(dist<=dist_max)[0]
        
        #Loop over selected stations
        for ind in indizes:

            station_info = all_stations[variab][ind]
            STA_id = station_info[0]

            #Print name of selected station
            print(station_info[1], end=', ')    

            #Open data file of selected station
            fname = dir_stations + variab + '/' + variab + '_STAID' + str(STA_id).zfill(6) + '.txt'
            f = open(fname, 'r')
            lines = f.readlines()[21:]
            f.close()

            #Read dates, values and value quality flag
            dates  = [line.split(',')[2] for line in lines]
            val    = [float(line.split(',')[3]) for line in lines]
            val_QF = [float(line.split(',')[4]) for line in lines]
            val    = np.array(val)
            val_QF = np.array(val_QF)

            #Convert temperature from 0.1 deg C to deg C
            if variab in ['TX', 'TG', 'TN']:  val = val / 10

            #Convert pressure from 0.1 hPa to Pa
            if variab=='PP':  val = val * 10

            #Mask temperature according to quality flag
            val[val_QF!=0] = np.NaN
            val = val.astype(float)
            
            #Convert dates
            dates  = [np.datetime64(date[0:4] + '-' + date[4:6] + '-' + date[6::]) for date in dates]
            dates  = pd.to_datetime(dates)
            
            #Create empty array
            data_out = xr.Dataset(coords={'time': ('time', dates),
                                           'lat':  ('lat', [station_info[2]]),
                                           'lon':  ('lon', [station_info[3]])})

            #Add variable to array
            data_out[variab] = (('time'), val)
            data_out.attrs['station_name'] = station_info[1]
            
            #Save in NetCDF file
            time_str  = str(dates[0].year) + '-' + str(dates[-1].year)
            fname_out = dir_city + variab + '_' + str(STA_id) + '_' + time_str + '.nc'
            
            #Save in NetCDF file
            if os.path.exists(fname_out): os.remove(fname_out)
            data_out.to_netcdf(fname_out)
            
        print('')
        

 - Istanbul: ISTANBUL, SCUTARI, 
 - Moscow: MOSKVAVDNX, MOSCOWAGROACAD., 
 - London: GOUDHURST, CAMBRIDGE(B.GDNS), ROTHAMSTED, WRITTLE, HAMPSTEAD, HEATHROW, BOGNORREGIS, EASTBOURNE, HASTINGS, WALLINGFORD, ALICEHOLTLODGE, HUNTINGTON, WETHERSFIELD, MONKSWOOD, WISLEY, WOBURN, 
 - SaintPetersburg: SANKT-PETERBURG, BELOGORKA, 
 - Berlin: BERLIN-DAHLEM, POTSDAM, LINDENBERG, NEURUPPIN, BERLIN-TEMPELHOF, ANGERMUNDE, BERLIN-TEGEL, DOBERLUG-KIRCHHAIN, EBERSWALDE, FELDBERG, GRUNOW, JESSEN/ELSTER, JUTERBOG, KLEINMACHNOW, LANGENLIPSDORF, BARUTH, LUBBEN-BLUMENFELDE, MUNCHEBERG, MENZ, NEUSTRELITZ, PETKUS, BERLIN-BUCH, BERLIN-DAHLEM(LFAG), BERLIN-KANISWALL, BERLIN-LICHTENRADE, BERLIN-LICHTERFELDE(SUD), BERLIN-MARZAHN, BERLIN-MITTE, BERLIN-RUDOW, BERLIN-SCHONEFELD, BERLIN-SPANDAU, BERLIN-TEGELERFLIESSTAL, BERLIN-TREPTOW, BERLIN-ZEHLENDORF, WITTENBERG, WOLDEGK, WUSTRAU-ZIETHENHORST, ZEHDENICK, BERGE, MELCHOW-SCHONHOLZ, BRANDENBURG-GORDEN, FELDBERG/MECKLENBURG, HECKELBERG, BERLIN-ADLERSHOF, BERLIN-FRIEDR