# Plotting the BS- Difference of the Climatological Ensemble and the RFC Models as well as the BSS
Version 21 December, Selina Kiefer

### Input: csv-files
continuous timeseries of daily BS values for every model in csv-format (including the climatological ensemble), binary timeseries of cold wave days in csv-format
### Output: png-files
winterwise plots of BS values in png-format, winterwise plots of BS-difference in png-format as well as winterwise and multi-winter mean bar plots of BSS values in png-format

#### Set the paths' to the defined functions, the style sheet for plotting and tthe configuration file and set its name

In [None]:
# Set the path to the defined functions.
PATH_defined_functions = './Defined_Functions/'

In [None]:
# Set the path and name of the style file which should be used for plotting.
style_file_for_plotting = './Style_File_Matplotlib.mplstyle'

In [None]:
# Set the path and name of the configuration file.
PATH_configurations = './Configuration_Files/'
ifile_configurations = 'Configurations_Plotting_Difference_BS_and_BSS.yaml'

#### Import the necessary python packages and functions
Nothing needs to be changed here.

In [None]:
# Import the necessary python packages.
import yaml
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import mpl_axes_aligner
import seaborn as sns
from datetime import timedelta

In [None]:
# Import the necessary defined functions.
import sys
sys.path.insert(1, PATH_defined_functions)
from read_in_csv_data import *
from truncate_data_by_date import *

#### Read in the style sheet for plotting

In [None]:
# Load the style sheet to be used by matplotlib for plotting. This will update the plotting
# parameters to e.g. have the right font, font size and figure size. The latter is adjusted to
# the textwidth of the LaTeX-document in order to avoid re-scaling the plot and changing 
# thereby the font size again.
plt.style.use(style_file_for_plotting)

#### Read in the configuration file and the data specified in it

In [None]:
# Read in the configuration file (nothing needs to be changed here).
with open(PATH_configurations+ifile_configurations) as f:
    config = yaml.safe_load(f)

##### Read in the ground truth

In [None]:
# Read in the binary ground truth and remove any unnamed columns as well as the index column
# (nothing needs to be changed here).
df_cold_waves = read_in_csv_data(config['PATH_ground_truth'], config['ifile_ground_truth_cold_waves'])
df_cold_waves = df_cold_waves.loc[:, ~df_cold_waves.columns.str.contains('^Unnamed')]
df_cold_waves = df_cold_waves.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the ground truth.
time_column_name_cold_waves = df_cold_waves.columns[0]
var_column_name_cold_waves = df_cold_waves.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Binary ground truth: ')
print(var_column_name_cold_waves)
print('Name of the column containing the time: ')
print(time_column_name_cold_waves)
print('Dataframe containing the ground truth: ')
df_cold_waves.head()

##### Read in the BS values of the climatological ensemble

In [None]:
# Read in the timeseries of the climatological ensemble's BS and remove any unnamed columns as
# well as the index column (nothing needs to be changed here).
df_climatological_ensemble_bs = read_in_csv_data(config['PATH_climatological_ensemble_bs'], config['ifile_climatological_ensemble_bs'])
df_climatological_ensemble_bs = df_climatological_ensemble_bs.loc[:, ~df_climatological_ensemble_bs.columns.str.contains('^Unnamed')]
df_climatological_ensemble_bs = df_climatological_ensemble_bs.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the cold wave predictions
# of the climatological ensemble.
time_column_name_climatological_ensemble_bs = df_climatological_ensemble_bs.columns[0]
var_column_name_climatological_ensemble_bs = df_climatological_ensemble_bs.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Name of skill measure of the benchmark model: ')
print(var_column_name_climatological_ensemble_bs)
print('Name of the column containing the time: ')
print(time_column_name_climatological_ensemble_bs)
print('Dataframe containing the timeseries of the skill measure of the benchmark model: ')
df_climatological_ensemble_bs.head()

##### Read in the BS values of the RFC predictions

In [None]:
# Read in the timeseries of the prediction's rps and remove any unnamed columns as well as the
# index column (nothing needs to be changed here).
df_prediction_bs = read_in_csv_data(config['PATHs_prediction_bs'][0], config['ifiles_prediction_bs'][0])
df_prediction_bs = df_prediction_bs.loc[:, ~df_prediction_bs.columns.str.contains('^Unnamed')]
df_prediction_bs = df_prediction_bs.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the predictions.
time_column_name_prediction_bs = df_prediction_bs.columns[0]
var_column_name_prediction_bs = df_prediction_bs.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Name of skill measure of the prediction: ')
print(var_column_name_prediction_bs)
print('Name of the column containing the time: ')
print(time_column_name_prediction_bs)
print('Dataframe containing the timeseries of the skill measure of the predictions: ')
df_prediction_bs.head()

#### Prepare details for plotting (e.g. a nice representation of the time)
From here on, nothing needs to be changed.

In [None]:
# A list with all the start years of the winters in the evaluation period is created. 
start_years_of_winter = np.arange(config['start_year_of_first_winter'], config['start_year_of_last_winter']+1)

In [None]:
# The time information used for plotting the timeseries of the skill measures is taken from the 
# climatological ensemble and converted to a datetime-object.
time_timeseries = pd.to_datetime(df_climatological_ensemble_bs[time_column_name_climatological_ensemble_bs])

In [None]:
# Before plotting, the information about the models which should be shown in the plot's legend
# are converted to a nice-looking string by creating the line-breaks set in the configuration 
# file.
list_str_input_info_for_plot_label_prediction_bs = []

str_input_info_for_plot_label_benchmark = config['input_data_label_climatological_ensemble']
str_input_info_for_plot_label_benchmark = str_input_info_for_plot_label_benchmark.replace('|', '\n')

for i in range(len(config['input_data_labels_prediction_bs'])):    
    str_input_info_for_plot_label_prediction_bs = config['input_data_labels_prediction_bs'][i]
    list_str_input_info_for_plot_label_prediction_bs.append(str_input_info_for_plot_label_prediction_bs.replace('|', '\n'))

#### Preparing the BS of the climatological ensemble and the RFC model for plotting

In [None]:
# Then, the timeseries of the BS is separated by winter and saved into lists for the
# climatologcal ensemble and a representative RFC model.
bs_climatological_ensemble = []
bs_representative_rfc_model = []
index_representative_rfc_model =config['index_representative_rfc_model']
bss_respective_winter = []
bss_timeseries = []

for start_year_of_winter in start_years_of_winter:
    
    start_winter = datetime(start_year_of_winter, config['start_month_winter'], config['start_day_winter'])
    end_winter = datetime(start_year_of_winter+1, config['end_month_winter'], config['end_day_winter'])

    df_climatological_ensemble_respective_winter = truncate_data_by_date(df_climatological_ensemble_bs, time_column_name_climatological_ensemble_bs, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))     
    df_climatological_ensemble_respective_winter_mean = np.nanmean(df_climatological_ensemble_respective_winter[var_column_name_climatological_ensemble_bs])
    time_winter = df_climatological_ensemble_respective_winter[time_column_name_climatological_ensemble_bs]
    
    bs_climatological_ensemble.append(df_climatological_ensemble_respective_winter[var_column_name_climatological_ensemble_bs])
    
    for k in range(len(config['ifiles_prediction_bs'])):
        df_prediction_bs = read_in_csv_data(config['PATHs_prediction_bs'][k], config['ifiles_prediction_bs'][k])
        df_prediction_bs = df_prediction_bs.loc[:, ~df_prediction_bs.columns.str.contains('^Unnamed')]
        df_prediction_bs = df_prediction_bs.drop(['index'], axis =1 )
        df_prediction_respective_winter = truncate_data_by_date(df_prediction_bs, time_column_name_prediction_bs, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))   
        
        if k == index_representative_rfc_model:
            bs_representative_rfc_model.append(df_prediction_respective_winter[var_column_name_prediction_bs])
        df_prediction_respective_winter_mean = np.nanmean(df_prediction_respective_winter[var_column_name_prediction_bs])
        bss_respective_winter.append(1-(df_prediction_respective_winter_mean/df_climatological_ensemble_respective_winter_mean))
        
    bss_timeseries.append(bss_respective_winter)
    bss_respective_winter = []


#### Visualizing the BS difference between the climatological ensemble and the RFC models

In [None]:
# The difference in BS values between the climatological ensemble and the RFC models is 
# calculated and plotted.
color_list=['purple', 'm', 'darkblue', 'cornflowerblue', 'green', 'yellowgreen']

for start_year_of_winter in start_years_of_winter:
    
    start_winter = datetime(start_year_of_winter, config['start_month_winter'], config['start_day_winter'])
    end_winter = datetime(start_year_of_winter+1, config['end_month_winter'], config['end_day_winter'])

    df_cold_waves_respective_winter = truncate_data_by_date(df_cold_waves, time_column_name_cold_waves, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))     
    
    df_climatological_ensemble_respective_winter = truncate_data_by_date(df_climatological_ensemble_bs, time_column_name_climatological_ensemble_bs, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))     
    time_winter = df_climatological_ensemble_respective_winter[time_column_name_climatological_ensemble_bs]
    
    fig, ax = plt.subplots()
    ax2 = ax.twinx()
    
    for k in range(len(config['ifiles_prediction_bs'])):
        df_prediction_bs = read_in_csv_data(config['PATHs_prediction_bs'][k], config['ifiles_prediction_bs'][k])
        df_prediction_bs = df_prediction_bs.loc[:, ~df_prediction_bs.columns.str.contains('^Unnamed')]
        df_prediction_bs = df_prediction_bs.drop(['index'], axis =1 )
        df_prediction_respective_winter = truncate_data_by_date(df_prediction_bs, time_column_name_prediction_bs, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d'))   
    
        difference_timeseries = df_climatological_ensemble_respective_winter[var_column_name_prediction_bs]-df_prediction_respective_winter[var_column_name_climatological_ensemble_bs]  
   
        indices_cold_wave_days = np.where(df_cold_waves_respective_winter[var_column_name_cold_waves]==1)
        indices_cold_wave_days = indices_cold_wave_days[0]
        indices_cold_wave_days = indices_cold_wave_days[0:len(indices_cold_wave_days)-1]
        for p in indices_cold_wave_days:
            ax2.axvspan(time_winter[p], time_winter[p+1], facecolor='grey', alpha=0.05)

        ax.plot(time_winter, difference_timeseries, linestyle='-', color=color_list[k], alpha=0.5, label=(list_str_input_info_for_plot_label_prediction_bs[k])) 
    
    ax.set_ylim(-0.56, 0.56) 
    ax.axhline(y=0, color='grey', linestyle='-')   
    mpl_axes_aligner.align.yaxes(ax, 0, ax2, 0, 0.5)
    ax2.tick_params(labelright=False, right=False)
    plt.setp(ax.get_xticklabels(), ha="center", rotation=45)
    # ax.legend(bbox_to_anchor=(0, -0.15), loc='upper left') # left out for standalone legend in paper
    plt.xlabel(time_column_name_climatological_ensemble_bs)
    ax.set_ylabel(var_column_name_prediction_bs+' Difference')
    plt.title(str(config['lead_time'])+'d lead', ha='left', x=-0)
    plt.savefig(config['PATH_plots']+var_column_name_prediction_bs+'_difference_'+config['binary_ground_truth']+'_'+config['rfc_model_names']+'_lead_'+str(config['lead_time'])+'d_'+str(start_year_of_winter)+'_'+str(start_year_of_winter+1)+'.png', bbox_inches='tight')
    #plt.show() # not used since the size of the jupyter notebook is really big
    plt.close() # only used since the size of the jupyter notebook is really big

In [None]:
# Creating a standalone legend for the plot visualizing the BS difference of the RFC
# predictions and the climatological ensemble.
first_line = plt.Line2D([], [], color=color_list[0], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_bs[0])
second_line = plt.Line2D([], [], color=color_list[1], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_bs[1])
third_line = plt.Line2D([], [], color=color_list[2], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_bs[2])
fourth_line = plt.Line2D([], [], color=color_list[3], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_bs[3])
fifth_line = plt.Line2D([], [], color=color_list[4], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_bs[4])
sixth_line = plt.Line2D([], [], color=color_list[5], alpha=0.5, label=list_str_input_info_for_plot_label_prediction_bs[5])

grey_filling = mpatches.Patch(color='grey', alpha=0.2, label='Ground Truth Cold Waves (E-OBS)')

plt.legend(handles=[first_line, second_line, grey_filling, third_line, fourth_line, fifth_line, sixth_line ], ncol=3)
plt.axis(False)
plt.savefig(config['PATH_plots']+'Standalone_colorbar_for_bs_difference_plot.png', bbox_inches='tight')
#plt.show()

#### Calculating and visualizing the BSS of the RFC models in respect to the climatological ensemble

In [None]:
# The BSS is calculated for the RFC models in respect to the climatological ensemble and
# plotted winterwise as a bar plot.
color_list=['purple', 'm', 'darkblue', 'cornflowerblue', 'green', 'yellowgreen']

for l in range(len(start_years_of_winter)):
    fig,ax = plt.subplots()
    bss_timeseries_current_winter = bss_timeseries[l]
    wintermean_bss = []
    list_str_input_info = []
    list_str_number_of_model = []

    for n in range(len(config['ifiles_prediction_bs'])):
        wintermean_bss.append(np.sum(np.array(bss_timeseries_current_winter)[n]))
        list_str_input_info.append(list_str_input_info_for_plot_label_prediction_bs[n])
        list_str_number_of_model.append(str(n+1))
    
    plt.axhline(y=0, color='grey', alpha=0.5)
    plt.axvline(x=1.5, color='grey',linestyle='--', alpha=0.5)
    plt.axvline(x=3.5, color='grey',linestyle='--', alpha=0.5)

    plt.bar(np.arange(len(config['ifiles_prediction_bs'])), np.array(wintermean_bss), color=color_list, alpha=0.8, tick_label=list_str_number_of_model)

    plt.ylim(-0.75, 0.35) 
    plt.setp(ax.xaxis.get_majorticklabels(), ha='center')
    plt.ylabel('BSS')
    plt.title('Winter '+str(start_years_of_winter[l])+'/'+str(start_years_of_winter[l]+1)+', '+str(config['lead_time'])+'d lead', ha='left', x=-0)
    plt.savefig(config['PATH_plots']+'BSS_'+config['binary_ground_truth']+'_'+config['rfc_model_names']+'_and_climatological_ensemble_'+'_lead_'+str(config['lead_time'])+'d_'+str(start_years_of_winter[l])+'_'+str(start_years_of_winter[l]+1)+'.png', bbox_inches='tight')
    #plt.show() # not used since the size of the jupyter notebook is really big
    plt.close() # only used since the size of the jupyter notebook is really big

In [None]:
# The mean BSS values over all winters are plotted for the RFC models with respect to the climatological
# ensemble in a bar plot.
fig,ax = plt.subplots()

longterm_bss = []
longterm_std_bss = []

list_str_input_info = []
list_str_number_of_model = []
color_list=['purple', 'm', 'darkblue', 'cornflowerblue', 'green', 'yellowgreen']

for n in range(len(config['ifiles_prediction_bs'])):
    longterm_bss.append(np.sum(np.array(bss_timeseries)[:,n]))
    longterm_std_bss.append(np.std(np.array(bss_timeseries)[:,n])) 
    list_str_input_info.append(list_str_input_info_for_plot_label_prediction_bs[n])
    list_str_number_of_model.append(str(n+1))
    
plt.axhline(y=0, color='grey', alpha=0.5)
plt.axvline(x=1.5, color='grey',linestyle='--', alpha=0.5)
plt.axvline(x=3.5, color='grey',linestyle='--', alpha=0.5)

plt.bar(np.arange(len(config['ifiles_prediction_bs'])), np.array(longterm_bss)/len(np.array(bss_timeseries)[:,n]), yerr=longterm_std_bss, ecolor='grey', capsize=5, color=color_list, alpha=0.8, tick_label=list_str_number_of_model)

plt.ylim(-0.35, 0.35) 
plt.setp(ax.xaxis.get_majorticklabels(), ha='center')
plt.ylabel('BSS')
ax.set_title(str(config['lead_time'])+'d lead', ha='right', x=1) # short title for paper
#ax.set_title('Winter '+str(config['start_year_of_first_winter'])+'/'+str(config['start_year_of_first_winter']+1)+' - '+str(config['start_year_of_last_winter'])+'/'+str(config['start_year_of_last_winter']+1)+'\n'+str(config['lead_time'])+'d lead', ha='left', x=-0)
plt.savefig(config['PATH_plots']+'BSS_'+config['binary_ground_truth']+'_'+config['rfc_model_names']+'_and_climatological_ensemble_'+'_lead_'+str(config['lead_time'])+'d_'+str(config['start_year_of_first_winter'])+'_'+str(config['start_year_of_last_winter']+1)+'.png', bbox_inches='tight')
#plt.show()

In [None]:
# Creating a standalone legend for the bar plot visualizing the BS.
first_filling = mpatches.Patch(color=color_list[0], label=list_str_number_of_model[0]+': '+list_str_input_info[0])
second_filling = mpatches.Patch(color=color_list[1], label=list_str_number_of_model[1]+': '+list_str_input_info[1])
third_filling = mpatches.Patch(color=color_list[2], label=list_str_number_of_model[2]+': '+list_str_input_info[2])
fourth_filling = mpatches.Patch(color=color_list[3], label=list_str_number_of_model[3]+': '+list_str_input_info[3])
fifth_filling = mpatches.Patch(color=color_list[4], label=list_str_number_of_model[4]+': '+list_str_input_info[4])
sixth_filling = mpatches.Patch(color=color_list[5], label=list_str_number_of_model[5]+': '+list_str_input_info[5])

plt.legend(handles=[first_filling, second_filling, third_filling, fourth_filling, fifth_filling, sixth_filling ], ncol=1)
plt.axis(False)
plt.savefig(config['PATH_plots']+'Standalone_legend_for_bs_bar_plot_half_textwidth_long.png', bbox_inches='tight')
#plt.show()

In [None]:
# Creating a standalone legend for the bar plot visualizing the BS.
first_filling = mpatches.Patch(color=color_list[0], label=list_str_number_of_model[0]+': '+list_str_input_info[0])
second_filling = mpatches.Patch(color=color_list[1], label=list_str_number_of_model[1]+': '+list_str_input_info[1])
third_filling = mpatches.Patch(color=color_list[2], label=list_str_number_of_model[2]+': '+list_str_input_info[2])
fourth_filling = mpatches.Patch(color=color_list[3], label=list_str_number_of_model[3]+': '+list_str_input_info[3])
fifth_filling = mpatches.Patch(color=color_list[4], label=list_str_number_of_model[4]+': '+list_str_input_info[4])
sixth_filling = mpatches.Patch(color=color_list[5], label=list_str_number_of_model[5]+': '+list_str_input_info[5])

plt.legend(handles=[first_filling, second_filling, third_filling, fourth_filling, fifth_filling, sixth_filling ], ncol=3)
plt.axis(False)
plt.savefig(config['PATH_plots']+'Standalone_legend_for_bs_bar_plot_textwidth_long.png', bbox_inches='tight')
#plt.show()

In [None]:
# End of Program