# Plotting the Ground Truth and the Predictions of the Climatological Ensemble and a RFC Model
Version 21 December, Selina Kiefer

### Input: csv-files
binary timeseries of cold wave days in the climatological ensemble in csv-format, predictions of a representative Random Forest Classifier models as binary timeseries of cold wave days in csv-format, binary timeseries of cold wave days in csv-format
### Output: png-files
winterwise plots of predicted and ground truth binary cold wave day timeseries in png-format  

#### Set the paths' to the defined functions, the style sheet for plotting and tthe configuration file and set its name

In [None]:
# Set the path to the defined functions.
PATH_defined_functions = './Defined_Functions/'

In [None]:
# Set the path and name of the style file which should be used for plotting.
style_file_for_plotting = './Style_File_Matplotlib.mplstyle'

In [None]:
# Set the path and name of the configuration file.
PATH_configurations = './Configuration_Files/'
ifile_configurations = 'Configurations_Visualization_Binary_Prediction_Ensembles.yaml'

#### Import the necessary python packages and functions
Nothing needs to be changed here.

In [None]:
# Import the necessary python packages.
import yaml
import numpy as np
import calendar
from datetime import datetime, timedelta
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as mpatches

In [None]:
# Read in the necessary defined functions.
import sys
sys.path.insert(1, PATH_defined_functions)
from read_in_csv_data import *
from truncate_data_by_date import*
from create_auxiliary_date import *

#### Read in the style sheet for plotting

In [None]:
# Load the style sheet to be used by matplotlib for plotting. This will update the plotting
# parameters to e.g. have the right font, font size and figure size. The latter is adjusted to
# the textwidth of the LaTeX-document in order to avoid re-scaling the plot and changing 
# thereby the font size again.
plt.style.use(style_file_for_plotting)

#### Read in the configuration file and the data specified in it

In [None]:
# Read in the configuration file (nothing needs to be changed here).
with open(PATH_configurations+ifile_configurations) as f:
    config = yaml.safe_load(f)

##### Read in the ground truth

In [None]:
# Read in the binary ground truth and remove any unnamed columns as well as the index column
# (nothing needs to be changed here).
df_cold_waves = read_in_csv_data(config['PATH_ground_truth'], config['ifile_ground_truth_cold_waves'])
df_cold_waves = df_cold_waves.loc[:, ~df_cold_waves.columns.str.contains('^Unnamed')]
df_cold_waves = df_cold_waves.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the ground truth.
time_column_name_cold_waves = df_cold_waves.columns[0]
var_column_name_cold_waves = df_cold_waves.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Binary ground truth: ')
print(var_column_name_cold_waves)
print('Name of the column containing the time: ')
print(time_column_name_cold_waves)
print('Dataframe containing the ground truth: ')
df_cold_waves.head()

##### Read in the climatological ensemble

In [None]:
# Read in the cold wave predictions of the climatological ensemble and remove any unnamed
# columns as well as the index column (nothing needs to be changed here).
df_climatological_ensemble_cold_waves = read_in_csv_data(config['PATH_climatological_ensemble'], config['ifile_climatological_ensemble_cold_waves'])
df_climatological_ensemble_cold_waves = df_climatological_ensemble_cold_waves.loc[:, ~df_climatological_ensemble_cold_waves.columns.str.contains('^Unnamed')]
df_climatological_ensemble_cold_waves = df_climatological_ensemble_cold_waves.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the cold wave predictions
# of the climatological ensemble.
time_column_name_climatological_ensemble_cold_waves = df_climatological_ensemble_cold_waves.columns[0]
var_column_name_climatological_ensemble_cold_waves = df_climatological_ensemble_cold_waves.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Names of cold wave predictions of the climatological ensemble: ')
print(var_column_name_climatological_ensemble_cold_waves)
print('Name of the column containing the time: ')
print(time_column_name_climatological_ensemble_cold_waves)
print('Dataframe containing the cold wave predictions of the climatological ensemble: ')
df_climatological_ensemble_cold_waves.head()

##### Read in the RFC predictions

In [None]:
# Read in the predictions of the RFC model and remove any unnamed columns as well as the index
# column (nothing needs to be changed here).
df_predictions_rfc = read_in_csv_data(config['PATH_predictions_rfc'], config['ifile_predictions_rfc'])
df_predictions_rfc = df_predictions_rfc.loc[:, ~df_predictions_rfc.columns.str.contains('^Unnamed')]
df_predictions_rfc = df_predictions_rfc.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the predictions of the
# RFC model.
time_column_name_predictions_rfc = df_predictions_rfc.columns[0]
var_column_name_predictions_rfc = df_predictions_rfc.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Names of predictions done by the RFC model: ')
print(var_column_name_predictions_rfc)
print('Name of the column containing the time: ')
print(time_column_name_predictions_rfc)
print('Dataframe containing the predictions of the RFC model: ')
df_predictions_rfc.head()

#### Prepare the binary ground truth, the cold wave predictions of the climatological ensemble and the predictions of the RFC model for plotting
From here on, nothing needs to be changed.

In [None]:
# A list with all the start years of the winters in the validation period is created. 
start_years_of_winter = np.arange(config['start_year_of_first_winter'], config['start_year_of_last_winter']+1)

In [None]:
# At first, two different dataframes are created with the threshold for the cold wave 
# definition. One for regular years and one for leap years. Therefore, the index of the original
# dataframe is set to the time and the index of the 29 February is determined. Then, a new 
# dataframe without the 29 February is created for regular years. The original dataframe is used
# for leap years.
df_climatological_ensemble_cold_waves[time_column_name_climatological_ensemble_cold_waves]=pd.to_datetime(df_climatological_ensemble_cold_waves[time_column_name_climatological_ensemble_cold_waves])
df_climatological_ensemble_cold_waves = df_climatological_ensemble_cold_waves.set_index(time_column_name_climatological_ensemble_cold_waves)
index_of_february_29 = df_climatological_ensemble_cold_waves[((df_climatological_ensemble_cold_waves.index.month == 2) & (df_climatological_ensemble_cold_waves.index.day == 29))].index
df_climatological_ensemble_cold_waves_without_29_feb = df_climatological_ensemble_cold_waves.drop(index_of_february_29)
df_climatological_ensemble_cold_waves = df_climatological_ensemble_cold_waves.reset_index()
df_climatological_ensemble_cold_waves_without_29_feb = df_climatological_ensemble_cold_waves_without_29_feb.reset_index()

In [None]:
# In a next step, the predictions of each year are extracted and saved to a list. The same is
# done for the ground truth. The respective forecast dates of each year are also saved to a 
# list. This is done for each of the winters separately. 
climatological_ensemble_cold_waves = []
predictions_rfc = []
cold_waves = []
forecast_dates = []

for start_year_of_winter in start_years_of_winter:
    
    start_winter = datetime(start_year_of_winter, config['start_month_winter'], config['start_day_winter'])
    end_winter = datetime(start_year_of_winter+1, config['end_month_winter'], config['end_day_winter'])

    df_cold_waves_respective_winter = truncate_data_by_date(df_cold_waves, time_column_name_cold_waves, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d')) 
   
    df_predictions_rfc_respective_winter = truncate_data_by_date(df_predictions_rfc, time_column_name_predictions_rfc, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d')) 

    if calendar.isleap(start_year_of_winter+1):
        df_predictions_respective_winter = df_climatological_ensemble_cold_waves
    else:
        df_predictions_respective_winter = df_climatological_ensemble_cold_waves_without_29_feb
    
    predictions_respective_winter = df_predictions_respective_winter[var_column_name_climatological_ensemble_cold_waves]
    predictions_respective_winter = np.array(np.squeeze(predictions_respective_winter))
 
    climatological_ensemble_cold_waves.append(predictions_respective_winter)
    predictions_rfc.append(df_predictions_rfc_respective_winter[var_column_name_predictions_rfc])
    
    cold_waves.append(df_cold_waves_respective_winter[var_column_name_cold_waves])
    forecast_dates.append(pd.to_datetime(df_cold_waves_respective_winter[time_column_name_cold_waves]))

#### Visualizing the predictions of the climatological ensemble and the RFC model for separate winters

In [None]:
# For illustration purposes, the fraction of ensemble members of the RFC predicting a cold waves
# is plotted with the cold waves and the ground truth. This gives a first impression about the 
# models' forecast skill.
for i in range(len(start_years_of_winter)):
    fig,ax = plt.subplots()

    cold_wave_days = np.array(cold_waves[i])
    if calendar.isleap(start_years_of_winter[i]+1):
        df_cold_wave_days = cold_wave_days[0:len(cold_wave_days)]
    else: 
        df_cold_wave_days = cold_wave_days[0:len(cold_wave_days)-6]
   
    cold_wave_days = np.array(df_cold_wave_days)
   
    indices_cold_wave_days = np.where(np.squeeze(cold_wave_days)==1)  
    indices_cold_wave_days = indices_cold_wave_days[0]
    indices_cold_wave_days = indices_cold_wave_days[0:len(indices_cold_wave_days)-1] 
    for k in indices_cold_wave_days:
        ax.axvspan(forecast_dates[i][k], forecast_dates[i][k+1], facecolor='grey', alpha=.2)

    ax.plot(forecast_dates[i], climatological_ensemble_cold_waves[i], marker='x', markersize=3.5, linestyle='', color='r', alpha=0.5, label='Climatological Ensemble')    
    ax.plot(forecast_dates[i], predictions_rfc[i], marker='o',markersize=3.5, linestyle='', color='b', alpha=0.5, label=config['rfc_model_name']+', '+str(config['lead_time'])+'d lead')    
    plt.xticks(rotation=45)
    plt.ylim(0, 0.6) 
   # plt.legend(bbox_to_anchor=(0, -0.45), loc='upper left') # standalone legend for the paper
   # plt.ylabel(var_column_name_predictions_rfc) # shorten for paper
    plt.ylabel('Fraction of \n Ensemble Members')    
    plt.savefig(config['PATH_plots']+config['rfc_model_name']+'_'+config['binary_ground_truth']+'_lead_'+str(config['lead_time'])+'d_'+str(start_years_of_winter[i])+'_'+str(start_years_of_winter[i]+1)+'.png', bbox_inches='tight')
    #plt.show() # not used since the size of the jupyter notebook is really big
    plt.close() # only used since the size of the jupyter notebook is really big

In [None]:
# Creating a standalone legend for the plot visualizing the climatological ensemble, the RFC
# predictions and the ground truth.
red_crosses = plt.Line2D([], [], marker='x', markersize=3.5, linestyle='', color='r', alpha=0.5, label='Climatological Ensemble')
blue_dots = plt.Line2D([], [], marker='o',markersize=3.5, linestyle='', color='b', alpha=0.5, label=config['rfc_model_name']+', '+str(config['lead_time'])+'d lead')
grey_filling = mpatches.Patch(color='grey', alpha=0.2, label='Ground Truth Cold Waves (E-OBS)')
plt.legend(handles=[red_crosses, grey_filling, blue_dots], ncol=2)
plt.axis(False)
plt.savefig(config['PATH_plots']+'Standalone_legend_for_visualization_binary_predictions.png', bbox_inches='tight')
plt.show()

In [None]:
# End of Program