# Calculating Threshold for Cold Waves
Version 17 January 2024, Selina Kiefer

### Input: csv-file
continuous timeseries of ground truth temperature in csv-format
### Output: csv-file and png-file
threshold temperatures for cold waves in csv-format and plotted in png-format

### Cold wave definition: Smid et al. (2019)

2 criteria have to be fulfilled for the classification of cold waves:
1. the temperature of a day has to be below the 10th percentile of a 30-year average of the 31-day running mean around this day
2. the criterion above has to be fulfilled for at least 3 continuous days

Here, there are 2 modifications to the original approach:
1. the 30-year average is set to the last 30 years before the evalutation period (winter 1970/1971 - 1999/2000) instead of 1981 - 2010
2. the daily mean temperature is used instead of the daily minimum temperature

Reference: Smid, M., Russo, S., Costa, A., Granell, C., & Pebesma, E. (2019). Ranking European capitals by
exposure to heat waves and cold waves. Urban Climate, 27, 388–402. doi:https://doi.org/10
.1016/j.uclim.2018.12.010 .

#### Set the paths' to the defined functions, the style sheet for plotting and tthe configuration file and set its name

In [None]:
# Set the path to the defined functions.
PATH_defined_functions = './Defined_Functions/'

In [None]:
# Set the path and name of the style file which should be used for plotting.
style_file_for_plotting = './Style_File_Matplotlib.mplstyle'

In [None]:
# Set the path and name of the configuration file.
PATH_configurations = './Configurations/'
ifile_configurations = 'Configurations_Thresholds_Cold_Waves.yaml'

#### Import the necessary python packages and functions

In [None]:
# Import the necessary python packages.
import yaml
import numpy as np
import calendar
from datetime import datetime, timedelta
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
# Read in the necessary functions.
import sys
sys.path.insert(1, PATH_defined_functions)
from read_in_csv_data import *
from truncate_data_by_date import*
from create_auxiliary_date import *

#### Read in the style sheet for plotting

In [None]:
# Load the style sheet to be used by matplotlib for plotting. This will update the plotting
# parameters to e.g. have the right font, font size and figure size. The latter is adjusted to
# the textwidth of the LaTeX-document in order to avoid re-scaling the plot and changing 
# thereby the font size again.
plt.style.use(style_file_for_plotting)

#### Read in the configuration file and the data specified in it

In [None]:
# Read in the configuration file (nothing needs to be changed here).
with open(PATH_configurations+ifile_configurations) as f:
    config = yaml.safe_load(f)

In [None]:
# Read in the ground truth and remove any unnamed columns as well as the index column (nothing 
# needs to be changed here).
df_ground_truth = read_in_csv_data(config['PATH_ground_truth'], config['ifile_ground_truth'])
df_ground_truth = df_ground_truth.loc[:, ~df_ground_truth.columns.str.contains('^Unnamed')]
df_ground_truth = df_ground_truth.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the ground truth.
time_column_name_ground_truth = df_ground_truth.columns[0]
var_column_name_ground_truth = df_ground_truth.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Ground truth to calculate the thresholds for the cold wave definition from: ')
print(var_column_name_ground_truth)
print('Name of the column containing the time: ')
print(time_column_name_ground_truth)
print('Dataframe containing the ground truth: ')
df_ground_truth.head()

In [None]:
# Read in the ground truth and remove any unnamed columns as well as the index column.
df_climatological_ensemble = read_in_csv_data(config['PATH_climatological_ensemble'], config['ifile_climatological_ensemble'])
df_climatological_ensemble = df_climatological_ensemble.loc[:, ~df_climatological_ensemble.columns.str.contains('^Unnamed')]
df_climatological_ensemble = df_climatological_ensemble.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the ground truth.
time_column_name_climatological_ensemble = df_climatological_ensemble.columns[0]

In [None]:
# Check that everything is selected correctly.
print('Name of the column containing the time: ')
print(time_column_name_climatological_ensemble)
print('Dataframe containing the ground truth: ')
df_climatological_ensemble.head()

#### Extract the winters to be used for calculating the thresholds of the cold wave definition

In [None]:
# The winters used for the calculation of the cold wave threshold are extracted from the 
# ground truth.
start_winter = datetime(config['start_year_of_first_winter'], config['start_month_winter'], config['start_day_winter'])
end_winter = datetime(config['start_year_of_last_winter']+1, config['end_month_winter'], config['end_day_winter'])

df_ground_truth_truncated = truncate_data_by_date(df_ground_truth, time_column_name_ground_truth, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d')) 

In [None]:
# Here, the minimum, median and maximum from the ground truth is calculated in oder to have visual check of the cold 
# wave criterion later on.
df_ground_truth_for_plotting = df_ground_truth_truncated
df_ground_truth_for_plotting['datetime'] = pd.to_datetime(df_ground_truth_for_plotting[time_column_name_ground_truth], format='%Y-%m-%d')
df_ground_truth_for_plotting = df_ground_truth_for_plotting.set_index('datetime')
ds_ground_truth_for_plotting_min = df_ground_truth_for_plotting.groupby([df_ground_truth_for_plotting.index.month, df_ground_truth_for_plotting.index.day]).min()
ds_ground_truth_for_plotting_median = df_ground_truth_for_plotting.groupby([df_ground_truth_for_plotting.index.month, df_ground_truth_for_plotting.index.day]).median()
ds_ground_truth_for_plotting_max = df_ground_truth_for_plotting.groupby([df_ground_truth_for_plotting.index.month, df_ground_truth_for_plotting.index.day]).max()

#### Apply a running mean to the data according to the cold wave definition


In [None]:
# In a next step, a 31-day rolling mean is applied to the ground truth timeseries.
df_ground_truth_truncated['rolling_mean'] = df_ground_truth_truncated[var_column_name_ground_truth].rolling(31, center=True).mean()

In [None]:
# Afterwards, the column containing the original temperature values is removed.
df_ground_truth_truncated = df_ground_truth_truncated.drop([var_column_name_ground_truth], axis=1)

#### Calculate the daily threshold values according to the cold wave definition

In [None]:
# To calculate daily thresholds for the cold waves from the ground truth, the data is sorted
# by month and day. To do so, the index of the pandas dataframe has to be set to the time.
df_ground_truth_truncated['datetime'] = pd.to_datetime(df_ground_truth_truncated[time_column_name_ground_truth], format='%Y-%m-%d')
df_ground_truth_truncated = df_ground_truth_truncated.set_index('datetime')

In [None]:
# Calculation of the daily thresholds (=10th percentile of the temperature).
ds_threshold_cold_waves = df_ground_truth_truncated.groupby([df_ground_truth_truncated.index.month, df_ground_truth_truncated.index.day]).quantile(0.1)

In [None]:
# The so calculated thresholds are then saved to a new pandas dataframe.
df_thresholds_cold_waves = pd.DataFrame()
df_thresholds_cold_waves['threshold_cold_wave'] = ds_threshold_cold_waves['rolling_mean']

#### Add a new time column for easier handling of the dataframe later on

In [None]:
# For a nice representation of the date on a plot and easier handling of the data, a "auxiliary date" is created. 
# This is simply a timeseries of dates of a leap year winter (here 2003/2004), which is afterwards sorted
# chronologically by month (Jan-Dec).The exact year itself does not matter since only the month and day are relevant.
auxiliary_time = create_auxiliary_date(config['start_month_winter'], config['start_day_winter'], config['end_month_winter'], config['end_day_winter'])

In [None]:
# The auxiliary date is appended to the dataframe containing the thresholds. 
df_thresholds_cold_waves['auxiliary_date'] = pd.to_datetime(np.array(auxiliary_time))
df_thresholds_cold_waves = df_thresholds_cold_waves.reset_index(drop=True)

In [None]:
# The same is done for the dataframe containing the ground truth for plotting the visual plausibility check of the 
# cold wave criterion.
df_ground_truth_for_plotting_min = pd.DataFrame(ds_ground_truth_for_plotting_min)
df_ground_truth_for_plotting_min['auxiliary_date'] = pd.to_datetime(np.array(auxiliary_time))

df_ground_truth_for_plotting_median = pd.DataFrame(ds_ground_truth_for_plotting_median)
df_ground_truth_for_plotting_median['auxiliary_date'] = pd.to_datetime(np.array(auxiliary_time))

df_ground_truth_for_plotting_max = pd.DataFrame(ds_ground_truth_for_plotting_max)
df_ground_truth_for_plotting_max['auxiliary_date'] = pd.to_datetime(np.array(auxiliary_time))

In [None]:
# In a next step, the index of the resulting dataframe is set to the auxiliary date and then ordered
# in time. This yields to "winter-sorted" (Nov-Apr) values.
df_thresholds_cold_waves = df_thresholds_cold_waves.set_index('auxiliary_date')
df_thresholds_cold_waves = df_thresholds_cold_waves.sort_index()
df_thresholds_cold_waves = df_thresholds_cold_waves.reset_index()

In [None]:
# Again, the same is done for the ground truth data for plotting the visual plausibility check of the cold wave
# criterion.
df_ground_truth_for_plotting_min = df_ground_truth_for_plotting_min.set_index('auxiliary_date')
df_ground_truth_for_plotting_min = df_ground_truth_for_plotting_min.sort_index()
df_ground_truth_for_plotting_min = df_ground_truth_for_plotting_min.reset_index()

df_ground_truth_for_plotting_median = df_ground_truth_for_plotting_median.set_index('auxiliary_date')
df_ground_truth_for_plotting_median = df_ground_truth_for_plotting_median.sort_index()
df_ground_truth_for_plotting_median = df_ground_truth_for_plotting_median.reset_index()

df_ground_truth_for_plotting_max = df_ground_truth_for_plotting_max.set_index('auxiliary_date')
df_ground_truth_for_plotting_max = df_ground_truth_for_plotting_max.sort_index()
df_ground_truth_for_plotting_max = df_ground_truth_for_plotting_max.reset_index()

#### Save the threshold values of the cold wave definition in csv-format

In [None]:
# The dataframe containing the cold wave threshold is saved in csv-format.
df_thresholds_cold_waves.to_csv(config['PATH_output_files']+'cold_wave_thresholds_Smid_et_al_2019_for_'+str(config['start_year_of_first_winter'])+'_'+str(config['start_year_of_last_winter']+1)+'.csv')

#### Visualize the climatological ensemble for a plausibility check

In [None]:
# The thresholds for the cold waves are plotted to check their plausibility and the plot saved 
# in png-format.
fig, ax = plt.subplots()
for k in range(len(df_climatological_ensemble.columns)-2):
    plt.plot(df_thresholds_cold_waves['auxiliary_date'], df_climatological_ensemble[str(config['start_year_of_first_winter']+k)], marker='o', markersize=1, color='grey', linestyle='--')
plt.plot(df_thresholds_cold_waves['auxiliary_date'], df_thresholds_cold_waves['threshold_cold_wave'], color='mediumblue', linewidth=2, linestyle='-', label='Cold Wave Threshold')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%b'))
plt.legend(bbox_to_anchor=(0, -0.40), loc='lower left', ncol=2)
plt.title('Climatological Ensemble, Winters '+str(config['start_year_of_first_winter'])+'-'+str(config['start_year_of_last_winter']+1))
plt.xlabel(time_column_name_ground_truth)
plt.ylabel(var_column_name_ground_truth+' in '+config['unit_of_ground_truth_and_predictions'])
plt.savefig(config['PATH_plots']+'cold_wave_thresholds_smid_et_al_2019_'+var_column_name_ground_truth+'_for_'+str(config['start_year_of_first_winter'])+'_'+str(config['start_year_of_last_winter']+1)+'.png', bbox_inches='tight')
plt.show()


In [None]:
# End of Program