# Plotting the Ground Truth and the Predictions of the Climatological Ensemble and a QRF Model
Version 21 December, Selina Kiefer

### Input: csv-files
ensemble of continuous timeseries of ground truth temperature for a winter (e.g. climatological ensemble) in csv-format, predictions of a representative Quantile Random Forest model as continuous timeseries of temperature in csv-format, continuous timeseries of ground truth temperature in csv-format
### Output: png-files
winterwise plots of predicted and ground truth continuous temperature timeseries in png-format

#### Set the paths' to the defined functions, the style sheet for plotting and tthe configuration file and set its name

In [None]:
# Set the path to the defined functions.
PATH_defined_functions = './Defined_Functions/'

In [None]:
# Set the path and name of the style file which should be used for plotting.
style_file_for_plotting = './Style_File_Matplotlib.mplstyle'

In [None]:
# Set the path and name of the configuration file.
PATH_configurations = './Configuration_Files/'
ifile_configurations = 'Configurations_Visualization_Continuous_Prediction_Ensembles.yaml'

#### Import the necessary python packages and functions
Nothing needs to be changed here.

In [None]:
# Import the necessary python packages.
import yaml
import numpy as np
import calendar
from datetime import datetime, timedelta
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as mpatches

In [None]:
# Read in the necessary defined functions.
import sys
sys.path.insert(1, PATH_defined_functions)
from read_in_csv_data import *
from truncate_data_by_date import*
from create_auxiliary_date import *

#### Read in the style sheet for plotting

In [None]:
# Load the style sheet to be used by matplotlib for plotting. This will update the plotting
# parameters to e.g. have the right font, font size and figure size. The latter is adjusted to
# the textwidth of the LaTeX-document in order to avoid re-scaling the plot and changing 
# thereby the font size again.
plt.style.use(style_file_for_plotting)

#### Read in the configuration file and the data specified in it

In [None]:
# Read in the configuration file (nothing needs to be changed here).
with open(PATH_configurations+ifile_configurations) as f:
    config = yaml.safe_load(f)

##### Read in the ground truth

In [None]:
# Read in the continuous ground truth and remove any unnamed columns as well as the index 
# column (nothing needs to be changed here).
df_ground_truth = read_in_csv_data(config['PATH_ground_truth'], config['ifile_ground_truth'])
df_ground_truth = df_ground_truth.loc[:, ~df_ground_truth.columns.str.contains('^Unnamed')]
df_ground_truth = df_ground_truth.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the ground truth.
time_column_name_ground_truth = df_ground_truth.columns[0]
var_column_name_ground_truth = df_ground_truth.columns[1]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Continuous ground truth: ')
print(var_column_name_ground_truth)
print('Name of the column containing the time: ')
print(time_column_name_ground_truth)
print('Dataframe containing the ground truth: ')
df_ground_truth.head()

##### Read in the climatological ensemble

In [None]:
# Read in the continuous predictions of the climatological ensemble and remove any unnamed 
# columns as well as the index column (nothing needs to be changed here).
df_climatological_ensemble = read_in_csv_data(config['PATH_climatological_ensemble'], config['ifile_climatological_ensemble'])
df_climatological_ensemble = df_climatological_ensemble.loc[:, ~df_climatological_ensemble.columns.str.contains('^Unnamed')]
df_climatological_ensemble = df_climatological_ensemble.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the continuous 
# predictions of the climatological ensemble. 
time_column_name_climatological_ensemble = df_climatological_ensemble.columns[0]
var_column_name_climatological_ensemble = df_climatological_ensemble.columns[1:]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Names of the member of the climatological ensemble: ')
print(var_column_name_climatological_ensemble)
print('Name of the column containing the time: ')
print(time_column_name_climatological_ensemble)
print('Dataframe containing the climatological ensemble: ')
df_climatological_ensemble.head()

##### Read in the QRF predictions

In [None]:
# Read in the predictions of the QRF model and remove any unnamed columns as well as the index
# column (nothing needs to be changed here).
df_predictions_qrf = read_in_csv_data(config['PATH_predictions_qrf'], config['ifile_predictions_qrf'])
df_predictions_qrf = df_predictions_qrf.loc[:, ~df_predictions_qrf.columns.str.contains('^Unnamed')]
df_predictions_qrf = df_predictions_qrf.drop(['index'], axis =1 )

In [None]:
# Set the name of the columns containing the time and the variables of the predictions of the
# QRF model.
time_column_name_predictions_qrf = df_predictions_qrf.columns[0]
var_column_name_predictions_qrf = df_predictions_qrf.columns[1:]

In [None]:
# Check that everything is selected correctly (nothing needs to be changed here).
print('Names of predictions done by the QRF model: ')
print(var_column_name_predictions_qrf)
print('Name of the column containing the time: ')
print(time_column_name_predictions_qrf)
print('Dataframe containing the predictions of the QRF model: ')
df_predictions_qrf.head()

#### Prepare the continuous ground truth, the climatological ensemble and the predictions of the QRF model for plotting
From here on, nothing needs to be changed.

In [None]:
# A list with all the start years of the winters in the evaluation period is created. 
start_years_of_winter = np.arange(config['start_year_of_first_winter'], config['start_year_of_last_winter']+1)

In [None]:
# Then, two different dataframes are created with the threshold for the cold wave 
# definition. One for regular years and one for leap years. Therefore, the index of the original
# dataframe is set to the time and the index of the 29 February is determined. Then, a new 
# dataframe without the 29 February is created for regular years. The original dataframe is used
# for leap years.
df_climatological_ensemble[time_column_name_climatological_ensemble]=pd.to_datetime(df_climatological_ensemble[time_column_name_climatological_ensemble])
df_climatological_ensemble = df_climatological_ensemble.set_index(time_column_name_climatological_ensemble)
index_of_february_29 = df_climatological_ensemble[((df_climatological_ensemble.index.month == 2) & (df_climatological_ensemble.index.day == 29))].index
df_climatological_ensemble_without_29_feb = df_climatological_ensemble.drop(index_of_february_29)
df_climatological_ensemble = df_climatological_ensemble.reset_index()
df_climatological_ensemble_without_29_feb = df_climatological_ensemble_without_29_feb.reset_index()

In [None]:
# In a next step, the predictions of each year are extracted and saved to a list. The same is
# done for the ground truth and the QRF predictions. The respective forecast dates of each year 
# are also saved to a list. This is done for each of the winters separately. 
climatological_ensemble = []
predictions_qrf = []
ground_truth = []
forecast_dates = []

for start_year_of_winter in start_years_of_winter:
    
    start_winter = datetime(start_year_of_winter, config['start_month_winter'], config['start_day_winter'])
    end_winter = datetime(start_year_of_winter+1, config['end_month_winter'], config['end_day_winter'])

    df_ground_truth_respective_winter = truncate_data_by_date(df_ground_truth, time_column_name_ground_truth, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d')) 
   
    df_predictions_qrf_respective_winter = truncate_data_by_date(df_predictions_qrf, time_column_name_predictions_qrf, start_winter.strftime('%Y_%m_%d'), end_winter.strftime('%Y_%m_%d')) 

    if calendar.isleap(start_year_of_winter+1):
        df_predictions_respective_winter = df_climatological_ensemble  
    else:
        df_predictions_respective_winter = df_climatological_ensemble_without_29_feb
    
    predictions_respective_winter = df_predictions_respective_winter.drop([time_column_name_climatological_ensemble], axis=1)
    predictions_respective_winter = np.array(np.squeeze(predictions_respective_winter))
 
    climatological_ensemble.append(predictions_respective_winter)
    predictions_qrf.append(df_predictions_qrf_respective_winter[var_column_name_predictions_qrf])
    
    ground_truth.append(df_ground_truth_respective_winter[var_column_name_ground_truth])
    forecast_dates.append(pd.to_datetime(df_ground_truth_respective_winter[time_column_name_ground_truth]))

#### Visualizing the predictions of the climatological ensemble and the QRF model for separate winters

In [None]:
# For illustration purposes, the median and two in the configuration file defined percentiles of
# the predictions are plotted together with the ground truth. This gives a first impression
# about the models' forecast skill.
for k in range(len(start_years_of_winter)):
    fig,ax = plt.subplots()
    ax.fill_between(x=forecast_dates[k], y1=np.nanpercentile(climatological_ensemble[k], config['upper_quantile']*100, axis=1), y2=np.nanpercentile(climatological_ensemble[k], config['lower_quantile']*100, axis=1), color='r', alpha=0.25, label='Climatological Ensemble')
    ax.fill_between(x=forecast_dates[k], y1=np.nanpercentile(predictions_qrf[k], config['upper_quantile']*100, axis=1), y2=np.nanpercentile(predictions_qrf[k], config['lower_quantile']*100, axis=1), color='b', hatch='.', alpha=0.2, label=config['qrf_model_name']+', '+str(config['lead_time'])+'d lead')  
    ax.plot(forecast_dates[k], np.array(np.squeeze(ground_truth[k])), color='k', linestyle='--', label='Ground Truth')
    plt.xticks(rotation=45)
    plt.ylim(260, 290) 
    #plt.legend(bbox_to_anchor=(0, -0.45), loc='upper left')   # plot as standalone legend for paper
    plt.ylabel(var_column_name_ground_truth+' in '+config['unit_continuous_ground_truth'])
    plt.savefig(config['PATH_plots']+config['qrf_model_name']+'_predictions_'+config['continuous_ground_truth']+'_lead_'+str(config['lead_time'])+'d_'+str(start_years_of_winter[k])+'_'+str(start_years_of_winter[k]+1)+'.png', bbox_inches='tight')
    #plt.show() # not used since the size of the jupyter notebook is really big
    plt.close() # only used since the size of the jupyter notebook is really big

In [None]:
# Creating a standalone legend for the plot visualizing the climatological ensemble, the QRF
# predictions and the ground truth.
red_filling = mpatches.Patch(color='r', alpha=0.25, label='Climatological Ensemble')
blue_dotted_filling = mpatches.Patch(color='b', hatch='.', alpha=0.2, label=config['qrf_model_name']+', '+str(config['lead_time'])+'d lead')
black_dashed_line = plt.Line2D([], [], color='k', linestyle='--', label='Ground Truth Temperature (E-OBS)')
plt.legend(handles=[red_filling, black_dashed_line, blue_dotted_filling], ncol=2)
plt.axis(False)
plt.savefig(config['PATH_plots']+'Standalone_legend_for_visualization_continuous_predictions.png', bbox_inches='tight')
plt.show()

In [None]:
# End of Program