# Creating a Climatological Ensemble from Ground Truth

Version 18 Januar 2024, Selina Kiefer

### Input: csv-files
continuous timeseries of ground truth temperature in csv-format
### Output: csv-file and png-file
continuous timeseries of ground truth temperature for a winter, whereby each year is serving as a member in the climatological ensemble, in csv-format and plotted in png-format

#### Set the paths' to the defined functions, the style sheet for plotting and the configuration file and set its name

In [None]:
# Set the path to the defined functions.
PATH_defined_functions = './Defined_Functions/'

In [None]:
# Set the path and name of the style file which should be used for plotting.
style_file_for_plotting = './Style_File_Matplotlib.mplstyle'

In [None]:
# Set the path and name of the configuration file.
PATH_configurations = './Configuration_Files/'
ifile_configurations = 'Configurations_Climatological_Ensemble.yaml'

#### Import the necessary python packages and functions

In [None]:
# Import the necessary python packages.
import yaml
import numpy as np
import calendar
from datetime import datetime, timedelta
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
# Read in the necessary defined functions.
import sys
sys.path.insert(1, PATH_defined_functions)
from read_in_csv_data import *
from truncate_data_by_date import*
from create_auxiliary_date import *

#### Read in the style sheet for plotting

In [None]:
# Load the style sheet to be used by matplotlib for plotting. This will update the plotting
# parameters to e.g. have the right font, font size and figure size. The latter is adjusted to
# the textwidth of the LaTeX-document in order to avoid re-scaling the plot and changing 
# thereby the font size again.
plt.style.use(style_file_for_plotting)
import warnings
warnings.simplefilter(action='ignore')

#### Read in the configuration file and the data specified in it

In [None]:
# Read in the configuration file (nothing needs to be changed here).
with open(PATH_configurations+ifile_configurations) as f:
    config = yaml.safe_load(f)

In [None]:
# Read in the ground truth and remove any unnamed columns as well as the index column (nothing 
# needs to be changed here).
df_ground_truth = read_in_csv_data(config['PATH_ground_truth'], config['ifile_ground_truth'])
df_ground_truth = df_ground_truth.loc[:, ~df_ground_truth.columns.str.contains('^Unnamed')]
df_ground_truth = df_ground_truth.drop(['index', 'level_0'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the ground truth.
time_column_name_ground_truth = df_ground_truth.columns[0]
var_column_name_ground_truth = df_ground_truth.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Ground truth to calculate the thresholds for the cold wave definition from: ')
print(var_column_name_ground_truth)
print('Name of the column containing the time: ')
print(time_column_name_ground_truth)
print('Dataframe containing the ground truth: ')
df_ground_truth.head()

#### Extract the winters to create the climatological ensemble from

In [None]:
# The winters used for the calculation of the climatological ensemble are extracted from the 
# ground truth.
start_winter = datetime(config['start_year_of_first_winter'], config['start_month_winter'], config['start_day_winter'])
end_winter = datetime(config['start_year_of_last_winter']+1, config['end_month_winter'], config['end_day_winter'])

df_ground_truth_truncated = truncate_data_by_date(df_ground_truth, time_column_name_ground_truth, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))   

#### Sort the data by month and day, so that every used winter becomes  one ensemble member

In [None]:
# To calculate a climatological ensemble, the data needs to be sorted by month and day. To do
# so, the time column is converted to a datetime-object and set as the index of the dataframe.
df_ground_truth_truncated['datetime'] = pd.to_datetime(df_ground_truth_truncated[time_column_name_ground_truth], format='%Y-%m-%d')
df_ground_truth_truncated = df_ground_truth_truncated.set_index('datetime')

In [None]:
# The dates in the datetime column are now separated into year, month and day. Each of these, 
# is stored in a separate column and then the index is reset.
df_ground_truth_truncated['year'] = df_ground_truth_truncated.index.year
df_ground_truth_truncated['month'] = df_ground_truth_truncated.index.month
df_ground_truth_truncated['day'] = df_ground_truth_truncated.index.day
df_ground_truth_truncated = df_ground_truth_truncated.reset_index()

In [None]:
# In the next step, the dataframe is reshaped (pivot_table). The month and day are used as new
# rows and the years as columns. The ensemble members of the climatological ensemble are 
# therefore the respective columns and the time/dates are the rows.
df_climatological_ensemble = df_ground_truth_truncated.pivot_table(index=['month', 'day'], columns='year', values=var_column_name_ground_truth)
df_climatological_ensemble = df_climatological_ensemble.reset_index()

#### Add a new time column for easier handling of the dataframe later on

In [None]:
# For an easier handling of the data later on, an "auxiliary date" is created. This is simply a
# timeseries of dates of a leap year winter (here 2003/2004), which is afterwards sorted 
# chronologically by month (Jan-Dec). The exact year itself does not matter since only the month
# and day are relevant for the climatological ensemble and furthermore only these two will be 
# shown on a plot.
auxiliary_time = create_auxiliary_date(config['start_month_winter'], config['start_day_winter'], config['end_month_winter'], config['end_day_winter'])

In [None]:
# Then, the auxiliary date is appended to the dataframe containing the climatological ensemble.
df_climatological_ensemble['auxiliary_date'] = pd.to_datetime(np.array(auxiliary_time))

In [None]:
# In a next step, this dataframe is sorted by time (the auxiliary time) to obtain the data in the
# order of a winter instead of in the order of ascending months.
df_climatological_ensemble = df_climatological_ensemble.set_index('auxiliary_date')
df_climatological_ensemble = df_climatological_ensemble.sort_index()
df_climatological_ensemble = df_climatological_ensemble.reset_index()

In [None]:
# Then, the index column is renamed from year to 'None' and another column called 'index' is
# inserted automatically. This is not on purpose and therefore deleted again.
df_climatological_ensemble = df_climatological_ensemble.rename_axis(None, axis=1)
df_climatological_ensemble = df_climatological_ensemble.drop(['month', 'day'], axis=1)
df_climatological_ensemble = df_climatological_ensemble.reset_index()
df_climatological_ensemble = df_climatological_ensemble.drop(['index'], axis=1)

#### Save the climatological ensemble in csv-format

In [None]:
# This dataframe containing the winter-sorted climatological ensemble is saved to a csv-file. 
df_climatological_ensemble.to_csv(config['PATH_output_files']+'daily_climatological_ensemble_'+var_column_name_ground_truth+'_'+str(config['start_year_of_first_winter'])+'_'+str(config['start_year_of_last_winter']+1)+'.csv') 

#### Visualize the climatological ensemble for a plausibility check

In [None]:
# The climatological ensemble is plotted to check its plausibility and the plot saved in 
# csv-format.
fig, ax = plt.subplots()
for k in range(len(df_climatological_ensemble.columns)-2):
    plt.plot(df_climatological_ensemble['auxiliary_date'], df_climatological_ensemble[config['start_year_of_first_winter']+k], marker='o', markersize=2, linestyle='--')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%b'))
plt.title('Daily Climatological Ensemble ('+str(config['start_year_of_first_winter'])+'-'+str(config['start_year_of_last_winter']+1)+')')
plt.xlabel(time_column_name_ground_truth)
plt.ylabel(var_column_name_ground_truth+' in '+config['unit_of_ground_truth_and_predictions'])
plt.savefig(config['PATH_plots']+'daily_climatological_ensemble_'+var_column_name_ground_truth+'_'+str(config['start_year_of_first_winter'])+'_'+str(config['start_year_of_last_winter']+1)+'.png', bbox_inches='tight')

In [None]:
# End of Program