# Plotting the CRPS-Difference of the Climatological Ensemble and the QRF Models and the CRPSS
Version 21 December, Selina Kiefer

### Input: csv-files
continuous timeseries of daily CRPS values for every model in csv-format (including the climatological ensemble), continuous timeseries of ground truth temperature in csv-format
### Output: png-files
winterwise plots of CRPS values in png-format, winterwise plots of CRPS-difference in png-format as well as winterwise and multi-winter mean bar plots of CRPSS values in png-format

#### Set the paths' to the defined functions, the style sheet for plotting and tthe configuration file and set its name

In [None]:
# Set the path to the defined functions.
PATH_defined_functions = './Defined_Functions/'

In [None]:
# Set the path and name of the style file which should be used for plotting.
style_file_for_plotting = './Style_File_Matplotlib.mplstyle'

In [None]:
# Set the path and name of the configuration file.
PATH_configurations = './Configuration_Files/'
ifile_configurations = 'Configurations_Plotting_Difference_CRPS_and_CRPSS.yaml'

#### Import the necessary python packages and functions
Nothing needs to be changed here.

In [None]:
# Import the necessary python packages.
import yaml
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import mpl_axes_aligner
import seaborn as sns
from datetime import timedelta

In [None]:
# Import the necessary defined functions.
import sys
sys.path.insert(1, PATH_defined_functions)
from read_in_csv_data import *
from truncate_data_by_date import *

#### Read in the style sheet for plotting

In [None]:
# Load the style sheet to be used by matplotlib for plotting. This will update the plotting
# parameters to e.g. have the right font, font size and figure size. The latter is adjusted to
# the textwidth of the LaTeX-document in order to avoid re-scaling the plot and changing 
# thereby the font size again.
plt.style.use(style_file_for_plotting)

#### Read in the configuration file and the data specified in it

In [None]:
# Read in the configuration file (nothing needs to be changed here).
with open(PATH_configurations+ifile_configurations) as f:
    config = yaml.safe_load(f)

##### Read in the ground truth

In [None]:
# Read in the continuous ground truth and remove any unnamed columns as well as the index 
# column (nothing needs to be changed here).
df_ground_truth = read_in_csv_data(config['PATH_ground_truth'], config['ifile_ground_truth'])
df_ground_truth = df_ground_truth.loc[:, ~df_ground_truth.columns.str.contains('^Unnamed')]
df_ground_truth = df_ground_truth.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the ground truth.
time_column_name_ground_truth = df_ground_truth.columns[0]
var_column_name_ground_truth = df_ground_truth.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Continuous ground truth: ')
print(var_column_name_ground_truth)
print('Name of the column containing the time: ')
print(time_column_name_ground_truth)
print('Dataframe containing the ground truth: ')
df_ground_truth.head()

##### Read in the CRPS values of the climatological ensemble

In [None]:
# Read in the timeseries of the climatological ensemble's CRPS and remove any unnamed columns 
# as well as the index column (nothing needs to be changed here).
df_climatological_ensemble_crps = read_in_csv_data(config['PATH_climatological_ensemble_crps'], config['ifile_climatological_ensemble_crps'])
df_climatological_ensemble_crps = df_climatological_ensemble_crps.loc[:, ~df_climatological_ensemble_crps.columns.str.contains('^Unnamed')]
df_climatological_ensemble_crps = df_climatological_ensemble_crps.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the climatological
# ensemble.
time_column_name_climatological_ensemble_crps = df_climatological_ensemble_crps.columns[0]
var_column_name_climatological_ensemble_crps = df_climatological_ensemble_crps.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Name of skill measure of the climatological ensemble: ')
print(var_column_name_climatological_ensemble_crps)
print('Name of the column containing the time: ')
print(time_column_name_climatological_ensemble_crps)
print('Dataframe containing the timeseries of the skill measure of the climatological ensemble: ')
df_climatological_ensemble_crps.head()

##### Read in the CRPS values of the QRF predictions 

In [None]:
# Read in the timeseries of the prediction's CRPS and remove any unnamed columns as well as the
# index column (nothing needs to be changed here).
df_prediction_crps = read_in_csv_data(config['PATHs_prediction_crps'][0], config['ifiles_prediction_crps'][0])
df_prediction_crps = df_prediction_crps.loc[:, ~df_prediction_crps.columns.str.contains('^Unnamed')]
df_prediction_crps = df_prediction_crps.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the predictions.
time_column_name_prediction_crps = df_prediction_crps.columns[0]
var_column_name_prediction_crps = df_prediction_crps.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Name of skill measure of the prediction: ')
print(var_column_name_prediction_crps)
print('Name of the column containing the time: ')
print(time_column_name_prediction_crps)
print('Dataframe containing the timeseries of the skill measure of the predictions: ')
df_prediction_crps.head()

#### Prepare details for plotting (e.g. a nice representation of the time)
From here on, nothing needs to be changed.

In [None]:
# A list with all the start years of the winters in the evaluation period is created. 
start_years_of_winter = np.arange(config['start_year_of_first_winter'], config['start_year_of_last_winter']+1)

In [None]:
# The time information used for plotting the timeseries of the skill measures is taken from the 
# climatological ensemble and converted to a datetime-object.
time_timeseries = pd.to_datetime(df_climatological_ensemble_crps[time_column_name_climatological_ensemble_crps])

In [None]:
# Before plotting, the information about the models which should be shown in the plot's legend
# are converted to a nice-looking string by creating the line-breaks set in the configuration 
# file.
list_str_input_info_for_plot_label_prediction_crps = []

str_input_info_for_plot_label_benchmark = config['input_data_label_climatological_ensemble']
str_input_info_for_plot_label_benchmark = str_input_info_for_plot_label_benchmark.replace('|', '\n')

for i in range(len(config['input_data_labels_prediction_crps'])):
    str_input_info_for_plot_label_prediction_crps = config['input_data_labels_prediction_crps'][i]
    list_str_input_info_for_plot_label_prediction_crps.append(str_input_info_for_plot_label_prediction_crps.replace('|', '\n'))

#### Preparing the CRPS of the climatological ensemble and the QRF model for plotting

In [None]:
# The timeseries of the CRPS is separated by winter and saved into lists for the
# climatologcal ensemble and a representative QRF model.
crps_climatological_ensemble = []
crps_representative_qrf_model = []
index_representative_qrf_model = config['index_representative_qrf_model']
crpss_respective_winter = []
crpss_timeseries = []

for start_year_of_winter in start_years_of_winter:
    
    start_winter = datetime(start_year_of_winter, config['start_month_winter'], config['start_day_winter'])
    end_winter = datetime(start_year_of_winter+1, config['end_month_winter'], config['end_day_winter'])
    df_climatological_ensemble_respective_winter = truncate_data_by_date(df_climatological_ensemble_crps, time_column_name_climatological_ensemble_crps, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))     
    df_climatological_ensemble_respective_winter_mean = np.nanmean(df_climatological_ensemble_respective_winter[var_column_name_climatological_ensemble_crps])
    
    crps_climatological_ensemble.append(df_climatological_ensemble_respective_winter[var_column_name_climatological_ensemble_crps])
      
    time_winter = df_climatological_ensemble_respective_winter[time_column_name_climatological_ensemble_crps]
    
    for k in range(len(config['ifiles_prediction_crps'])):
        df_prediction_crps = read_in_csv_data(config['PATHs_prediction_crps'][k], config['ifiles_prediction_crps'][k])
        df_prediction_crps = df_prediction_crps.loc[:, ~df_prediction_crps.columns.str.contains('^Unnamed')]
        df_prediction_crps = df_prediction_crps.drop(['index'], axis =1 )
        df_prediction_respective_winter = truncate_data_by_date(df_prediction_crps, time_column_name_prediction_crps, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))   
        if k == index_representative_qrf_model:
            crps_representative_qrf_model.append(df_prediction_respective_winter[var_column_name_prediction_crps])
        df_prediction_respective_winter_mean = np.nanmean(df_prediction_respective_winter[var_column_name_prediction_crps])
        crpss_respective_winter.append(1-(df_prediction_respective_winter_mean/df_climatological_ensemble_respective_winter_mean))
    crpss_timeseries.append(crpss_respective_winter)
    crpss_respective_winter = []


#### Visualizing the CRPS difference between the climatological ensemble and the QRF models

In [None]:
# The difference in CRPS values between the climatological ensemble and the QRF models is 
# calculated and plotted for each winter separately.
color_list=['purple', 'm', 'darkblue', 'cornflowerblue', 'green', 'yellowgreen']

for start_year_of_winter in start_years_of_winter:
    
    start_winter = datetime(start_year_of_winter, config['start_month_winter'], config['start_day_winter'])
    end_winter = datetime(start_year_of_winter+1, config['end_month_winter'], config['end_day_winter'])

    df_ground_truth_respective_winter = truncate_data_by_date(df_ground_truth, time_column_name_ground_truth, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))     
    
    df_climatological_ensemble_respective_winter = truncate_data_by_date(df_climatological_ensemble_crps, time_column_name_climatological_ensemble_crps, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))     
    time_winter = df_climatological_ensemble_respective_winter[time_column_name_climatological_ensemble_crps]
    
    fig, ax = plt.subplots()
    ax2 = ax.twinx()
    
    for k in range(len(config['ifiles_prediction_crps'])):
        df_prediction_crps = read_in_csv_data(config['PATHs_prediction_crps'][k], config['ifiles_prediction_crps'][k])
        df_prediction_crps = df_prediction_crps.loc[:, ~df_prediction_crps.columns.str.contains('^Unnamed')]
        df_prediction_crps = df_prediction_crps.drop(['index'], axis =1 )
        df_prediction_respective_winter = truncate_data_by_date(df_prediction_crps, time_column_name_prediction_crps, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))   
    
        difference_timeseries = df_climatological_ensemble_respective_winter[var_column_name_prediction_crps]-df_prediction_respective_winter[var_column_name_climatological_ensemble_crps]  

        ax.plot(time_winter, difference_timeseries, linestyle='-', color=color_list[k], alpha=0.5, label=(list_str_input_info_for_plot_label_prediction_crps[k]))
    
    ax2.plot(time_winter, df_ground_truth_respective_winter[var_column_name_ground_truth], color='k', linestyle='--', label='Ground truth temperature')   
    ax2.set_ylim(260, 290) 
    ax.set_ylim(-4, 4) 
    ax.axhline(y=0, color='grey', linestyle='-')   
    mpl_axes_aligner.align.yaxes(ax, 0, ax2, 273.15, 0.5)
    #ax2.legend(bbox_to_anchor=(0, -0.45), loc='upper left') # left out for standalone legend in paper
    #ax.legend(bbox_to_anchor=(0, -0.55), loc='upper left')    # left out for standalone legend in paper
    plt.xlabel(time_column_name_climatological_ensemble_crps)
    plt.setp(ax.get_xticklabels(), ha="center", rotation=45)
    ax.set_ylabel(var_column_name_prediction_crps+' Difference in'+config['unit_continuous_ground_truth'])
    ax2.set_ylabel(var_column_name_ground_truth+' in '+config['unit_continuous_ground_truth'])
    plt.title(str(config['lead_time'])+'d lead', ha='right', x=1)
    plt.savefig(config['PATH_plots']+var_column_name_prediction_crps+'_difference_'+config['continuous_ground_truth']+config['qrf_model_names']+'_lead_'+str(config['lead_time'])+'d_'+str(start_year_of_winter)+'_'+str(start_year_of_winter+1)+'.png', bbox_inches='tight')
    #plt.show() # not used since the size of the jupyter notebook is really big
    plt.close() # only used since the size of the jupyter notebook is really big

In [None]:
# Creating a standalone legend for the plot visualizing the CRPS difference of the QRF
# predictions and the climatological ensemble.
first_line = plt.Line2D([], [], color=color_list[0], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_crps[0])
second_line = plt.Line2D([], [], color=color_list[1], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_crps[1])
third_line = plt.Line2D([], [], color=color_list[2], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_crps[2])
fourth_line = plt.Line2D([], [], color=color_list[3], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_crps[3])
fifth_line = plt.Line2D([], [], color=color_list[4], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_crps[4])
sixth_line = plt.Line2D([], [], color=color_list[5], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_crps[5])

black_dashed_line = plt.Line2D([], [], color='k', linestyle='--', label='Ground Truth Temperature (E-OBS)')

plt.legend(handles=[first_line, second_line, black_dashed_line, third_line, fourth_line, fifth_line, sixth_line ], ncol=3)
plt.axis(False)
plt.savefig(config['PATH_plots']+'Standalone_colorbar_for_crps_difference_plot.png', bbox_inches='tight')
plt.show()

#### Calculating and visualizing the CRPSS of the QRF models in respect to the climatological ensemble

In [None]:
# The CRPSS is calculated for the QRF models in respect to the climatological ensemble and
# plotted winterwise as bar plots.
color_list=['purple', 'm', 'darkblue', 'cornflowerblue', 'green', 'yellowgreen']

for l in range(len(start_years_of_winter)):
    fig,ax = plt.subplots()
    crpss_timeseries_current_winter = crpss_timeseries[l]
    wintermean_crpss = []
    list_str_input_info = []
    list_str_number_of_model = []

    for n in range(len(config['ifiles_prediction_crps'])):
        wintermean_crpss.append(np.sum(np.array(crpss_timeseries_current_winter)[n]))
        list_str_input_info.append(list_str_input_info_for_plot_label_prediction_crps[n])
        list_str_number_of_model.append(str(n+1))
    
    plt.axhline(y=0, color='grey', alpha=0.5)
    plt.axvline(x=1.5, color='grey',linestyle='--', alpha=0.5)
    plt.axvline(x=3.5, color='grey',linestyle='--', alpha=0.5)

    plt.bar(np.arange(len(config['ifiles_prediction_crps'])), np.array(wintermean_crpss), color=color_list, alpha=0.8, tick_label=list_str_number_of_model)

    plt.ylim(-0.3, 0.3) 
    plt.setp(ax.xaxis.get_majorticklabels(), ha='center')
    plt.ylabel('CRPSS')
    plt.title('Winter '+str(start_years_of_winter[l])+'/'+str(start_years_of_winter[l]+1)+', '+str(config['lead_time'])+'d lead', ha='left', x=-0)
    plt.savefig(config['PATH_plots']+'CRPSS_'+config['continuous_ground_truth']+'_'+config['qrf_model_names']+'_and_climatological_ensemble_'+'_lead_'+str(config['lead_time'])+'d_'+str(start_years_of_winter[l])+'_'+str(start_years_of_winter[l]+1)+'.png', bbox_inches='tight')
    #plt.show() # not used since the size of the jupyter notebook is really big
    plt.close() # only used since the size of the jupyter notebook is really big

In [None]:
# The mean CRPSS values over all winters are plotted for the QRF models with respect to the climatological
# ensemble in a bar plot.
longterm_crpss = []
longterm_std_crpss = []

list_str_input_info = []
list_str_number_of_model = []
color_list=['purple', 'm', 'darkblue', 'cornflowerblue', 'green', 'yellowgreen']

for m in range(len(config['ifiles_prediction_crps'])):
    longterm_crpss.append(np.sum(np.array(crpss_timeseries)[:,m]))
    longterm_std_crpss.append(np.std(np.array(crpss_timeseries)[:,m]))
    list_str_input_info.append(list_str_input_info_for_plot_label_prediction_crps[m])
    list_str_number_of_model.append(str(m+1))
    
fig,ax = plt.subplots()
plt.axhline(y=0, color='grey', alpha=0.5)
plt.axvline(x=1.5, color='grey',linestyle='--', alpha=0.5)
plt.axvline(x=3.5, color='grey',linestyle='--', alpha=0.5)

plt.bar(np.arange(len(config['ifiles_prediction_crps'])), np.array(longterm_crpss)/len(np.array(crpss_timeseries)[:,m]), yerr=longterm_std_crpss, ecolor='grey', capsize=5, color=color_list, alpha=0.8, tick_label=list_str_number_of_model)

plt.ylim(-0.2, 0.2) 
plt.setp(ax.xaxis.get_majorticklabels(), ha='center')
plt.ylabel('CRPSS')
ax.set_title(str(config['lead_time'])+'d lead', ha='right', x=1) # Short title for in paper
#ax.set_title('Winter '+str(config['start_year_of_first_winter'])+'/'+str(config['start_year_of_first_winter']+1)+' - '+str(config['start_year_of_last_winter'])+'/'+str(config['start_year_of_last_winter']+1)+'\n'+str(config['lead_time'])+'d lead', ha='left', x=-0)
plt.savefig(config['PATH_plots']+'CRPSS_'+config['continuous_ground_truth']+'_'+config['qrf_model_names']+'_and_climatological_ensemble_'+'_lead_'+str(config['lead_time'])+'d_'+str(config['start_year_of_first_winter'])+'_'+str(config['start_year_of_last_winter']+1)+'.png', bbox_inches='tight')
plt.show()

In [None]:
# Creating a compact standalone legend for the bar plot visualizing the CRPSS.
first_filling = mpatches.Patch(color=color_list[0], label=list_str_number_of_model[0]+': '+list_str_input_info[0])
second_filling = mpatches.Patch(color=color_list[1], label=list_str_number_of_model[1]+': '+list_str_input_info[1])
third_filling = mpatches.Patch(color=color_list[2], label=list_str_number_of_model[2]+': '+list_str_input_info[2])
fourth_filling = mpatches.Patch(color=color_list[3], label=list_str_number_of_model[3]+': '+list_str_input_info[3])
fifth_filling = mpatches.Patch(color=color_list[4], label=list_str_number_of_model[4]+': '+list_str_input_info[4])
sixth_filling = mpatches.Patch(color=color_list[5], label=list_str_number_of_model[5]+': '+list_str_input_info[5])

plt.legend(handles=[first_filling, second_filling, third_filling, fourth_filling, fifth_filling, sixth_filling ], ncol=1)
plt.axis(False)
plt.savefig(config['PATH_plots']+'Standalone_legend_for_crpss_bar_plot_half_textwidth_long.png', bbox_inches='tight')
plt.show()

In [None]:
# Creating a compact standalone legend for the bar plot visualizing the CRPSS.
first_filling = mpatches.Patch(color=color_list[0], label=list_str_number_of_model[0]+': '+list_str_input_info[0])
second_filling = mpatches.Patch(color=color_list[1], label=list_str_number_of_model[1]+': '+list_str_input_info[1])
third_filling = mpatches.Patch(color=color_list[2], label=list_str_number_of_model[2]+': '+list_str_input_info[2])
fourth_filling = mpatches.Patch(color=color_list[3], label=list_str_number_of_model[3]+': '+list_str_input_info[3])
fifth_filling = mpatches.Patch(color=color_list[4], label=list_str_number_of_model[4]+': '+list_str_input_info[4])
sixth_filling = mpatches.Patch(color=color_list[5], label=list_str_number_of_model[5]+': '+list_str_input_info[5])

plt.legend(handles=[first_filling, second_filling, third_filling, fourth_filling, fifth_filling, sixth_filling ], ncol=3)
plt.axis(False)
plt.savefig(config['PATH_plots']+'Standalone_legend_for_crpss_bar_plot_whole_textwidth_long.png', bbox_inches='tight')
plt.show()

In [None]:
# End of Program