# LookUp Climate Data from EU xr.DataSets for Austrian Locations

### Presettings

In [1]:
import os
import regex as re
import datetime as dt
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt


In [2]:
# All directories
root       = "C:/Users/freiste/OneDrive - Ilmatieteen laitos/Documents/IIASA YSSP 2023"
this_dir   =  os.getcwd()

input_dir  = f"{root}/02 - Data/EU/Copernicus_E-OBS_Weather_Postprocessed"
output_dir = f"{root}/02 - Data/AUT"

### Methods

In [3]:
# get all certain-type files inside a specific folder

def show_all_files(input_dir, typ='.csv'):

    regexp = re.compile(fr"{typ}")
    files  = [path.name for path in os.scandir(input_dir) if path.is_file() if regexp.search(path.name)]
    
    return files


In [4]:
def years_in_data(yearfiles):
    
    all_years=[]

    for yf in yearfiles:
        try:
            f = pd.read_excel(f'{output_dir}/{yf}')
        except:
            f = pd.read_csv(f'{output_dir}/{yf}')
            
        f.drop(index=0, inplace=True)
        f = f.Year.to_list()

        all_years.extend(f)

    return list(set(all_years))

In [5]:
# Fill in missing years
def resample_years(df, firstyear, lastyear):
    mux = pd.MultiIndex.from_product([df.ForestryDistrict.unique() , 
                                      range(firstyear,lastyear + 1)], 
                                     names=['ForestryDistrict', 'Year'])

    return df.set_index(['ForestryDistrict', 'Year']).reindex(mux, method='ffill').reset_index()

# Postprocessing

### Create skeleton

In [25]:
# Begin from Geographical Details

Clim = pd.read_excel(f'{output_dir}/Data_Geography.xlsx')

droplist = ['Area', 'Population','PopulationDensity', 'NumberOfMunicipalities',
            'lat_center1','lon_center1', 'elev_center1', 'lat_center2', 'lon_center2',
            'elev_center2', 'elev_center']
Clim.drop(columns=droplist, index=[0, 85], inplace=True)
Clim

Unnamed: 0,ForestryDistrict,FederalState,lat_center,lon_center
1,Eisenstadt+Rust,Burgenland,47.82915,16.67885
2,Güssing,Burgenland,47.113836,16.320419
3,Jennersdorf,Burgenland,46.98195,16.197311
4,Mattersburg,Burgenland,47.728369,16.418644
5,Neusiedl/See,Burgenland,47.900129,16.951993
...,...,...,...,...
80,Bregenz,Vorarlberg,47.411097,9.937755
81,Dornbirn,Vorarlberg,47.416538,9.733014
82,Feldkirch,Vorarlberg,47.257412,9.664555
83,Vorarlberg,Vorarlberg_total,47.25,9.916667


In [26]:
# Get list of years
yearfiles = ['Data_BMLRT_AnnualLoggingReports.xlsx', 'Data_BWF_DocumentationOfForestdamagefactors.xlsx', 'Data_BWF_ForestInventory_Postprocessed.csv']

YiD = years_in_data(yearfiles)
del yearfiles

In [27]:
# All geographical locations in all years:
Clim['Year']=YiD[0]
Clim = resample_years(Clim, YiD[0], YiD[-1])

In [28]:
# AUT Climate Table Skeleton
skeleton_order = ['ForestryDistrict', 'FederalState', 'Year', 'lat_center', 'lon_center']
Clim = Clim[skeleton_order]
Clim

Unnamed: 0,ForestryDistrict,FederalState,Year,lat_center,lon_center
0,Eisenstadt+Rust,Burgenland,1988,47.82915,16.67885
1,Eisenstadt+Rust,Burgenland,1989,47.82915,16.67885
2,Eisenstadt+Rust,Burgenland,1990,47.82915,16.67885
3,Eisenstadt+Rust,Burgenland,1991,47.82915,16.67885
4,Eisenstadt+Rust,Burgenland,1992,47.82915,16.67885
...,...,...,...,...,...
2935,Wien,Kärnten,2018,46.599096,14.691885
2936,Wien,Kärnten,2019,46.599096,14.691885
2937,Wien,Kärnten,2020,46.599096,14.691885
2938,Wien,Kärnten,2021,46.599096,14.691885


## Look up data (lat, lon, year) and be-flesh Skeleton

In [7]:
postpr_clim_files = show_all_files(input_dir, typ='.nc')
postpr_clim_files

['cumulative_degreedays_europe_1980-2022_0.25deg.nc',
 'cumulative_relevant_degreedays_europe_1980-2022_0.25deg.nc',
 'daylength_daily_europe_1980-2022_0.25deg.nc',
 'daylight_daily_europe_1980-2022_0.25deg.nc',
 'daylight_monthlyavg_europe_1980-2022_0.25deg.nc',
 'humidity_monthlymean_europe_1980-2022_0.25deg.nc',
 'max_bb_generations_europe_1980-2022_0.25deg.nc',
 'overwintering_mortality_europe_1980-2022_0.25deg.nc',
 'precipitation_monthlymean_europe_1980-2022_0.25deg.nc',
 'tempavg_monthlydegreedays_europe_1980-2022_0.25deg.nc',
 'tempavg_monthlymean_europe_1980-2022_0.25deg.nc',
 'tempmax_monthlydegreedays_europe_1980-2022_0.25deg.nc',
 'tempmax_monthlymean_europe_1980-2022_0.25deg.nc',
 'tempmin_monthlydegreedays_europe_1980-2022_0.25deg.nc',
 'tempmin_monthlymean_europe_1980-2022_0.25deg.nc',
 'windspeed_monthlymean_europe_1980-2022_0.25deg.nc']

In [40]:
def add_data_vars(nc_file):
    
    ds = xr.open_dataset(f'{input_dir}/{nc_file}')

    for ad in list(ds.keys()):
        
        # Didn't figure out a better way to do it, but it works...
        def lookup_climate_val(i, par=ad):

            val =\
            ds.sel(longitude = Clim.lon_center[i],
                   latitude  = Clim.lat_center[i],
                   year      = Clim.Year[i],
                   method    = 'nearest')[par].values

            return val


        def add_looked_up_column(par=ad):

            Clim[par] = Clim.index.map(lookup_climate_val)

        add_looked_up_column()


In [41]:
relevant_files = [
    'cumulative_degreedays_europe_1980-2022_0.25deg.nc',
    'cumulative_relevant_degreedays_europe_1980-2022_0.25deg.nc',
    'max_bb_generations_europe_1980-2022_0.25deg.nc',
    'overwintering_mortality_europe_1980-2022_0.25deg.nc'
]

for nc_file in relevant_files:
    print(nc_file)
    add_data_vars(nc_file)
Clim

cumulative_degreedays_europe_1980-2022_0.25deg.nc
cumulative_relevant_degreedays_europe_1980-2022_0.25deg.nc


  flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(
  flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA[delta]).astype(


max_bb_generations_europe_1980-2022_0.25deg.nc
overwintering_mortality_europe_1980-2022_0.25deg.nc


Unnamed: 0,ForestryDistrict,FederalState,Year,lat_center,lon_center,degreedays,relevant_degreedays,max_generations,season_start,season_end,season_length,frostdays,overkilldays
0,Eisenstadt+Rust,Burgenland,1988,47.829150,16.678850,3454.059570,1760.250122,3.160233,1988-05-06,1988-08-07,93,,
1,Eisenstadt+Rust,Burgenland,1989,47.829150,16.678850,3352.829346,1651.180054,2.964417,1989-05-07,1989-08-08,93,,
2,Eisenstadt+Rust,Burgenland,1990,47.829150,16.678850,3309.578369,1700.519775,3.052998,1990-05-07,1990-08-08,93,,
3,Eisenstadt+Rust,Burgenland,1991,47.829150,16.678850,3241.460205,1646.650024,2.956284,1991-05-07,1991-08-08,93,14.0,
4,Eisenstadt+Rust,Burgenland,1992,47.829150,16.678850,3546.810059,1799.090210,3.229964,1992-05-06,1992-08-07,93,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2935,Wien,Wien,2018,48.208354,16.372504,4067.920410,2022.809937,3.631616,2018-05-06,2018-08-08,94,,
2936,Wien,Wien,2019,48.208354,16.372504,3783.009521,1925.109863,3.456212,2019-05-06,2019-08-08,94,,
2937,Wien,Wien,2020,48.208354,16.372504,3619.449463,1775.670166,3.187918,2020-05-05,2020-08-07,94,,
2938,Wien,Wien,2021,48.208354,16.372504,3536.899658,1884.549927,3.383393,2021-05-06,2021-08-08,94,,


In [44]:
Clim= Clim.astype({'season_start': 'datetime64[ns]',
             'season_end'  : 'datetime64[ns]'})
Clim['season_length'] = (Clim['season_end'] - Clim['season_start']).dt.days

In [48]:
Clim['frostdays'] = Clim.frostdays.fillna(0)
Clim['overkilldays'] = Clim.overkilldays.fillna(0)

In [49]:
Clim

Unnamed: 0,ForestryDistrict,FederalState,Year,lat_center,lon_center,degreedays,relevant_degreedays,max_generations,season_start,season_end,season_length,frostdays,overkilldays
0,Eisenstadt+Rust,Burgenland,1988,47.829150,16.678850,3454.059570,1760.250122,3.160233,1988-05-06,1988-08-07,93,0.0,0.0
1,Eisenstadt+Rust,Burgenland,1989,47.829150,16.678850,3352.829346,1651.180054,2.964417,1989-05-07,1989-08-08,93,0.0,0.0
2,Eisenstadt+Rust,Burgenland,1990,47.829150,16.678850,3309.578369,1700.519775,3.052998,1990-05-07,1990-08-08,93,0.0,0.0
3,Eisenstadt+Rust,Burgenland,1991,47.829150,16.678850,3241.460205,1646.650024,2.956284,1991-05-07,1991-08-08,93,14.0,0.0
4,Eisenstadt+Rust,Burgenland,1992,47.829150,16.678850,3546.810059,1799.090210,3.229964,1992-05-06,1992-08-07,93,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2935,Wien,Wien,2018,48.208354,16.372504,4067.920410,2022.809937,3.631616,2018-05-06,2018-08-08,94,0.0,0.0
2936,Wien,Wien,2019,48.208354,16.372504,3783.009521,1925.109863,3.456212,2019-05-06,2019-08-08,94,0.0,0.0
2937,Wien,Wien,2020,48.208354,16.372504,3619.449463,1775.670166,3.187918,2020-05-05,2020-08-07,94,0.0,0.0
2938,Wien,Wien,2021,48.208354,16.372504,3536.899658,1884.549927,3.383393,2021-05-06,2021-08-08,94,0.0,0.0


### Intermediate storage

In [150]:
Clim = pd.read_csv(f'{output_dir}/degreedays_season_frostdays.csv')
Clim.drop(columns='Unnamed: 0', inplace=True)
Clim= Clim.astype({'season_start': 'datetime64[ns]',
             'season_end'  : 'datetime64[ns]'})
Clim

Unnamed: 0,ForestryDistrict,FederalState,Year,lat_center,lon_center,degreedays,relevant_degreedays,max_generations,season_start,season_end,season_length,frostdays,overkilldays
0,Eisenstadt+Rust,Burgenland,1988,47.829150,16.678850,3454.059570,1760.250122,3.160233,1988-05-06,1988-08-07,93,0.0,0.0
1,Eisenstadt+Rust,Burgenland,1989,47.829150,16.678850,3352.829346,1651.180054,2.964417,1989-05-07,1989-08-08,93,0.0,0.0
2,Eisenstadt+Rust,Burgenland,1990,47.829150,16.678850,3309.578369,1700.519775,3.052998,1990-05-07,1990-08-08,93,0.0,0.0
3,Eisenstadt+Rust,Burgenland,1991,47.829150,16.678850,3241.460205,1646.650024,2.956284,1991-05-07,1991-08-08,93,14.0,0.0
4,Eisenstadt+Rust,Burgenland,1992,47.829150,16.678850,3546.810059,1799.090210,3.229964,1992-05-06,1992-08-07,93,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2935,Wien,Wien,2018,48.208354,16.372504,4067.920410,2022.809937,3.631616,2018-05-06,2018-08-08,94,0.0,0.0
2936,Wien,Wien,2019,48.208354,16.372504,3783.009521,1925.109863,3.456212,2019-05-06,2019-08-08,94,0.0,0.0
2937,Wien,Wien,2020,48.208354,16.372504,3619.449463,1775.670166,3.187918,2020-05-05,2020-08-07,94,0.0,0.0
2938,Wien,Wien,2021,48.208354,16.372504,3536.899658,1884.549927,3.383393,2021-05-06,2021-08-08,94,0.0,0.0


In [151]:
monthly_values = [
    #'daylight_monthlyavg_europe_1980-2022_0.25deg.nc',   # needs it's own function, because doesn't have longitude coordinates
    'humidity_monthlymean_europe_1980-2022_0.25deg.nc',
    'precipitation_monthlymean_europe_1980-2022_0.25deg.nc',
    #'tempavg_monthlydegreedays_europe_1980-2022_0.25deg.nc',
    'tempavg_monthlymean_europe_1980-2022_0.25deg.nc',
    #'tempmax_monthlydegreedays_europe_1980-2022_0.25deg.nc',
    'tempmax_monthlymean_europe_1980-2022_0.25deg.nc',
    #'tempmin_monthlydegreedays_europe_1980-2022_0.25deg.nc',
    'tempmin_monthlymean_europe_1980-2022_0.25deg.nc',
    'windspeed_monthlymean_europe_1980-2022_0.25deg.nc']

In [152]:
# Add monthly averages

for f, file in enumerate(monthly_values):
    print(file[:10])

    T = xr.open_dataset(f'{input_dir}/{file}')
    
    nom = ['Humavg', 'Precipavg', 'Tavg', 'Tmaxavg', 'Tminavg', 'Windspavg'][f]

    def lookup_monthyl_climate_val(i):
        
        seas_mos = range(Clim.season_start[i].month, Clim.season_end[i].month +1 )
        
        v = [i for i in T.data_vars][0]

        for mo in seas_mos:
            Clim.at[i, f'{nom}_{mo}'] = T.sel(latitude = Clim.lat_center[i],
                                            longitude = Clim.lon_center[i],
                                            time = f"{Clim.Year[i]}-{mo}",
                                            method='nearest')[v].values
            
    Clim.index.map(lookup_monthyl_climate_val)


humidity_m
precipitat
tempavg_mo
tempmax_mo
tempmin_mo
windspeed_


In [145]:
Clim=\
Clim.rename(columns={
    'Tmixavg_5' : 'Tminavg_5',
    'Tmixavg_6' : 'Tminavg_6',
    'Tmixavg_7' : 'Tminavg_7',
    'Tmixavg_8' : 'Tminavg_8'
})

In [149]:
Clim.columns.unique()

Index(['ForestryDistrict', 'FederalState', 'Year', 'lat_center', 'lon_center',
       'degreedays', 'relevant_degreedays', 'max_generations', 'season_start',
       'season_end', 'season_length', 'frostdays', 'overkilldays',
       'Lightavg_5', 'Lightavg_6', 'Lightavg_7', 'Lightavg_8', 'Humavg_5',
       'Humavg_6', 'Humavg_7', 'Humavg_8', 'Precipavg_5', 'Precipavg_6',
       'Precipavg_7', 'Precipavg_8', 'Tavg_5', 'Tavg_6', 'Tavg_7', 'Tavg_8',
       'Tmaxavg_5', 'Tmaxavg_6', 'Tmaxavg_7', 'Tmaxavg_8', 'Tminavg_5',
       'Tminavg_6', 'Tminavg_7', 'Tminavg_8', 'Lightavg_JJA', 'Humavg_JJA',
       'Precipavg_JJA', 'Tavg_JJA', 'Tmaxavg_JJA', 'Tminavg_JJA'],
      dtype='object')

In [154]:
# Make Season averages
nom = ['Humavg', 'Precipavg', 'Tavg', 'Tmaxavg', 'Tminavg', 'Windspavg']

for n in nom: 
    Clim[f'{n}_JJA'] = Clim[[f'{n}_6',f'{n}_7',f'{n}_8']].mean(axis=1)


In [155]:
Clim

Unnamed: 0,ForestryDistrict,FederalState,Year,lat_center,lon_center,degreedays,relevant_degreedays,max_generations,season_start,season_end,...,Windspavg_5,Windspavg_6,Windspavg_7,Windspavg_8,Humavg_JJA,Precipavg_JJA,Tavg_JJA,Tmaxavg_JJA,Tminavg_JJA,Windspavg_JJA
0,Eisenstadt+Rust,Burgenland,1988,47.829150,16.678850,3454.059570,1760.250122,3.160233,1988-05-06,1988-08-07,...,2.714000,2.815161,2.710333,2.889355,58.461648,1.310968,18.493773,24.467051,12.539398,2.804950
1,Eisenstadt+Rust,Burgenland,1989,47.829150,16.678850,3352.829346,1651.180054,2.964417,1989-05-07,1989-08-08,...,4.035333,3.070968,2.726333,2.362258,66.873007,2.265341,17.173635,22.522856,11.934115,2.719853
2,Eisenstadt+Rust,Burgenland,1990,47.829150,16.678850,3309.578369,1700.519775,3.052998,1990-05-07,1990-08-08,...,3.332666,2.374193,2.482333,2.685484,60.960215,1.428065,17.652146,23.601348,11.728927,2.514003
3,Eisenstadt+Rust,Burgenland,1991,47.829150,16.678850,3241.460205,1646.650024,2.956284,1991-05-07,1991-08-08,...,3.248333,4.737096,3.411999,3.276774,65.655444,3.905699,16.788432,21.979372,11.557678,3.808623
4,Eisenstadt+Rust,Burgenland,1992,47.829150,16.678850,3546.810059,1799.090210,3.229964,1992-05-06,1992-08-07,...,4.045667,3.004839,2.935667,3.388387,61.459356,1.530072,18.438225,24.163195,12.769395,3.109631
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2935,Wien,Wien,2018,48.208354,16.372504,4067.920410,2022.809937,3.631616,2018-05-06,2018-08-08,...,3.306667,2.672903,3.073334,2.845484,60.620725,2.357061,20.729293,26.267427,15.140590,2.863907
2936,Wien,Wien,2019,48.208354,16.372504,3783.009521,1925.109863,3.456212,2019-05-06,2019-08-08,...,3.574666,3.583548,2.926334,2.902258,61.259747,2.171792,19.686796,25.105243,14.207316,3.137380
2937,Wien,Wien,2020,48.208354,16.372504,3619.449463,1775.670166,3.187918,2020-05-05,2020-08-07,...,2.318667,3.061935,3.496000,2.692581,62.455594,2.667742,18.309544,23.876347,12.681067,3.083505
2938,Wien,Wien,2021,48.208354,16.372504,3536.899658,1884.549927,3.383393,2021-05-06,2021-08-08,...,3.584334,3.934193,2.796666,3.169355,57.924838,2.116165,19.492581,25.301587,13.617634,3.300071


In [137]:
Clim[['Lightavg_6', 'Lightavg_7', 'Lightavg_8']].mean(axis=1)

0       58.461648
1       66.873007
2       60.960215
3       65.655444
4       61.459356
          ...    
2935    60.620725
2936    61.259747
2937    62.455594
2938    57.924838
2939    64.596166
Length: 2940, dtype: float64

# Export

In [131]:
Clim.to_csv(f'{output_dir}/almost_all_climatedata.csv')