# Function Setup

### 3.) Data Visualization

In [None]:
## Plot Feature Values w.r.t. Target (i.e. Policy) Values for a fixed point in time (position)
## Aim: Observe Non-linear Effect of Age, Duration and Age of Contract
## Note: For zero-cost assumption, target is linear w.r.t. Sum Insured

def plot_feature_structure(x, y,feature_name = 'Age', pos = 10):
    
    #x_comp = np.unique(x)
    #y_comp = np.unique(y, axis = 0)
    
    plt.plot(x,y[:,pos], 'o')
    plt.ylabel('Policy Value (time fixed)', fontsize = 'large')
    plt.xlabel(feature_name, fontsize = 'large')
    plt.show()
    return

In [None]:
## Aim here is the same as for plot_feature_structure
## This function presents the target value (at some fixed point in time) w.r.t. the feature component's value
## for all feature components

def plot_all_features_structure(x_lst, y_lst, names_lst = ['Age', 'Sum Insured','Duration', 'Age of Contract'],
                                pos_lst = [10,10,1,0], fig_size = (12,8)):
    
    n_features = len(x_lst)
    #ax = [None,None,None,None]
    fig, ax = plt.subplots(2,2,figsize= fig_size)
    ax = ax.flatten()
    for i in range(n_features):
        ax[i].plot(x_lst[i], y_lst[i][:,pos_lst[i]], '.')
        ax[i].set_xlabel(names_lst[i], fontsize = 'large')
        if i in [0,2]:
            ax[i].set_ylabel('Policy Value')
        i+=1
    
    return

In [10]:
# Given a Training History, visualize MSE and MAE (incl. their log-versions)
def training_progress_visual(history, option_validation=True, option_relate = True, y = None, 
                             fig_size = (10,6), model_name = '', option_simple_fig = False):
                            #option_mse = True, option_mae=True, 
    
#    if option_mse and option_mae:
#        fig, ax = plt.subplots(2,2,figsize = fig_size)
#    elif option_mse or option_mae:
#        fig, ax = plt.subplots(1,2,figsize = fig_size)
#    else:
#        exit()

    y = y.flatten()
    n_y = len(y)
    n_epoch = len(history['loss'])
    
    relate_mse_5 = relate_loss(data=y, discrepancy=0.05, measure='mse')
    relate_mse_1 = relate_loss(data=y, discrepancy=0.01, measure='mse')
    relate_mae_5 = relate_loss(data=y, discrepancy=0.05, measure='mae')
    relate_mae_1 = relate_loss(data=y, discrepancy=0.01, measure='mae')
        
    if option_simple_fig == True:
        fig, ax = plt.subplots(1,1,figsize = fig_size)
        ax.plot(np.log(history['loss']), label = 'Training Set')
        ax.plot(np.log(history['val_loss']), label = 'Validation Set')
        ax.axhline(np.log(relate_mse_5),xmax = n_epoch,  color = 'black', linestyle = '-.', label = '$q=0.05$')
        ax.axhline(np.log(relate_mse_1),xmax = n_epoch,  color = 'green', linestyle = '-.', label = '$q=0.01$')
        ax.set_ylabel('log(MSE)', fontsize = 'large')
        ax.set_xlabel('Epoch', fontsize = 'large')
        ax.legend()
        plt.show()

    else:
        fig, ax = plt.subplots(2,2,figsize = fig_size)
        ax[0,0].plot(history['loss'], label = 'Training Set') 
        if option_validation: 
            ax[0,0].plot(history['val_loss'], label = 'Validation Set')
        ax[0,0].axhline((relate_mse_5),xmax = n_epoch,  color = 'black', linestyle = '-.', 
                        label = '$q=0.05$')
        ax[0,0].axhline((relate_mse_1),xmax = n_epoch,  color = 'green', linestyle = '-.', 
                        label = '$q=0.01$')
        ax[0,0].set_ylabel('MSE')

        ax[0,1].plot(np.log(history['loss']))
        ax[0,1].axhline(np.log(relate_mse_5),xmax = n_epoch,  color = 'black', linestyle = '-.')
        ax[0,1].axhline(np.log(relate_mse_1),xmax = n_epoch,  color = 'green', linestyle = '-.')
        ax[0,1].set_ylabel('log(MSE)')

        ax[1,0].plot(history['mean_absolute_error']) 
        ax[1,0].axhline((relate_mae_5),xmax = n_epoch,  color = 'black', linestyle = '-.')
        ax[1,0].axhline((relate_mae_1),xmax = n_epoch,  color = 'green', linestyle = '-.')
        ax[1,0].set_ylabel('MAE')
        ax[1,0].set_xlabel('Epoch')

        ax[1,1].plot(np.log(history['mean_absolute_error']))
        ax[1,1].axhline(np.log(relate_mae_5),xmax = n_epoch,  color = 'black', linestyle = '-.')
        ax[1,1].axhline(np.log(relate_mae_1),xmax = n_epoch,  color = 'green', linestyle = '-.')
        ax[1,1].set_ylabel('log(MAE)')
        ax[1,1].set_xlabel('Epoch')

        if option_validation:
            ax[0,1].plot(np.log(history['val_loss']))
            ax[1,0].plot(history['val_mean_absolute_error'])
            ax[1,1].plot(np.log(history['val_mean_absolute_error']))

        # Display Legend for Training (and optional Validation) Data
        ax[0,0].legend()

        fig.suptitle('Training Progress of Model '+ model_name)
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()
    
    
    return

In [6]:
## For a single, selected Contract plot predictions of all single-models used for ensemble, as well as
## Ensemble-Model's prediction and target Values
## Aim: Observe Benefit of averaging Single Models by creating a Ensemble

def ensemble_plot(models, model_ens, data, y, contract_display = 0, display_wo_qualitative_part = True, 
                  display_qualitative_part = True, model_ens_wo_qual = None, fig_size = (10,6)):
    
    fig, ax = plt.subplots(1,1,figsize = fig_size)
    n_models = len(models)
    pred = []
    # 1st model seperate, in order to label all models only once
    pred.append(models[0].predict(x=data[contract_display:contract_display+1,:,:]).flatten())
    ax.plot(pred[0], 'black', linestyle = ':', label = 'Single Model(s)')
    for i in range(1,n_models):
        pred.append(models[i].predict(x=data[contract_display:contract_display+1,:,:]).flatten())
        ax.plot(pred[i], 'black', linestyle = ':')
        
    # Ensemble, without qualitative Component
    if display_wo_qualitative_part:
        ax.plot(model_ens_wo_qual.predict(x=data[contract_display:contract_display+1,:,:]).flatten(),
                color = 'magenta', linestyle = '-', label = 'EA')
    
    # Ensemble, with qualitative Component
    if display_qualitative_part:
        ax.plot(model_ens.predict(x=data[contract_display:contract_display+1,:,:]).flatten(), 
                color = 'blue', linestyle = ':', marker = 'o', label = 'EAQ')
    
    # Display Targets
    ax.plot(y[contract_display,:], '*r', label = 'Target')
    plt.legend()
    ax.set_xlabel('Time, t', fontsize = 'large')
    ax.set_ylabel('Policy Value, V_t', fontsize = 'large')
    #ax.set_title('Visualization of Fit for a single, selected Contract')
    plt.show()
    
    return

In [7]:
## Compare multiple rnns w.r.t. their training history

def rnn_single_dim_config_plots(hist,  scale,ref5, ref1,dictionary_lambda= [False,False,True,True], 
                                measure = 'loss', show_val = True, fig_size = (10,6)):
    
    # Compary models' learning process based on MSE #################################
    color_lst = ['blue', 'green', 'red', 'c', 'purple', 'brown', 'yellow']
    x_axis_len = len(hist[0].history['loss'])
    fig, ax = plt.subplots(1,1,figsize= fig_size)
    for i in range(len(hist)):
        cache = 1
        if dictionary_lambda[i]==False:
            cache = scale**2
        ax.plot(range(1,x_axis_len+1),np.log(np.array(hist[i].history['loss'])*cache),'r', 
                label = 'Model {}'.format(i), color = color_lst[i%(len(color_lst)+1)])
        if show_val: ax.plot(range(1,x_axis_len+1),np.log(np.array(hist[i].history['val_loss'])*cache),'--r', 
                              color = color_lst[i%(len(color_lst)+1)]) #label = 'Model {} - Validation'.format(i),)
    
    ax.axhline(np.log(ref5),xmax = len(hist[0].history['loss']),  color = 'black', linestyle = '-.',label = '$q=0.05$')
    ax.axhline(np.log(ref1),xmax = len(hist[0].history['loss']),  color = 'grey', linestyle = '-.',label = '$q=0.01$')
    #ep = len(hist[0].history['loss'])
    #ax.text(0.95*ep,np.log(ref5)-0.7, '$q=0.05$')
    #ax.text(0.95*ep,np.log(ref1)+0.5,'$q=0.01$', color = 'grey')

    ax.legend()
    ax.set_ylabel('log(MSE)', fontsize = 'large')
    ax.set_xlabel('Epoch', fontsize = 'large')
    
    #plt.tight_layout()    
    return

In [18]:
## Vizualize the model's prediction in comparison to target values for a selected contract.


def visualize_prediction_quality(model, x, y, position = 0, model_name = '', fig_size = (8,4), 
                                 additional_plot = False, add_y = None, normalize_add_y = True,
                                plot_on_ax = False, ax = None, fig = None):
    
    #fig, ax = plt.subplots(1,1, figsize = fig_size)
    if plot_on_ax == False:
        fig, ax = plt.subplots(1,1, figsize = fig_size)
    
    # Case I: Single Contract Prediction
    if type(position) == int:
        # Case 0: 2-dimensional Data
        if len(x.shape) == 2:
            pred = model.predict(x[position:position+1,:]).flatten()
        # Case 1: 3-dimensional Data
        elif len(x.shape) == 3:
            pred = model.predict(x[position:position+1,:,:]).flatten()
        else:
            print('Unknown Data Input')
            exit()
        # Plot Prediction
        ax.plot(pred, '-.', label = 'Prediction')
        # Plot Target
        ax.plot(y[position,:], '*r',label = 'Target')
    
    # Case II: Multiple Contract Prediction
    elif type(position) == list or type(position).__module__ ==np.__name__:
        case = 'II'
        pred = []
        for i in position:
            # Case 0: 2-dimensional Data
            if len(x.shape) == 2:
                pred.append(model.predict(x[i:i+1,:]).flatten())
            # Case 1: 3-dimensional Data
            elif len(x.shape) == 3:
                pred = model.predict(x[i:i+1,:,:]).flatten()
            else:
                print('Unknown Data Input')
                exit()
            # Plot Predictions:
            if i==position[0]:
                ax.plot(pred, '-.', label = 'Prediction')
                ax.plot(y[i,:], '*',label = 'Target')
            else:
                ax.plot(pred, '-.')
                ax.plot(y[i,:], '*')
        
    else:
        print('Unknown Input Type position.')
    
    ax.set_xlabel('Time, t', fontsize = 'large')
    ax.set_ylabel('Value', fontsize = 'large')
    
    if additional_plot:
        if normalize_add_y:
            add_y = add_y[position,:]/add_y[position,:].max()
        ax.plot(add_y, ':g', label = 'Policy Value \n (scaled)')
        
    ax.legend(loc = 1)
        
    if fig != None:
        fig.suptitle('Visualization of Model '+ model_name + ' for selected, single contract.')
    
    return

In [10]:
## For a given model calculate prediction values.
## In the subsequent absolute error and display it relative 
## to the target value.
## Visually, we present the relative error.

def plot_accuracy_cum(model_lst, x, y, model_plain_lst = [None], x_plain = [None], fig_size = (10,6)):
    
    
    pred = model_lst[0].predict(x)
    pred_cum = pred.sum(axis=0)
    y_cum = y.sum(axis=0)
    index_pos = y_cum > 0
    
    # save precisions in dataframe
    stat_columns = list(range(len(index_pos[index_pos==True])))
    df = pd.DataFrame(data = None, index = stat_columns, columns = None)
    
    fig, ax = plt.subplots(1,1, figsize = fig_size)

    # Include 2nd x-axis for absolute policy value
    # plot first for better visibility
    ax2 = ax.twinx()
    ax2.set_ylabel('Cumulative Policy Value', color = 'grey', fontsize = 'large')
    N_bins = int(len(y_cum)/10)
    ax2.bar(range(len(y_cum)),(y_cum), color = 'grey', alpha = .2)
    #ax2.plot(np.log(1+pred_cum), color = 'grey', linestyle = '-.')
    ax2.tick_params(axis='y')




    # plots limits for accuracy
    ax.plot(range(dur_max), np.hstack(np.array([np.repeat(0.05, 15), 
                                                 np.repeat(0.1, 15),np.repeat(0.2, dur_max-30)])), '--r')
    ax.plot(range(dur_max), -np.hstack(np.array([np.repeat(0.05, 15),np.repeat(0.1, 15),
                                          np.repeat(0.2, dur_max-30)])), '--r')
    #Plot models' accuracy
    acc = (pred_cum[index_pos]-y_cum[index_pos])/y_cum[index_pos]
    ax.plot(acc, label = 'EAQ 0' ) #, color = 'green')
    df.loc[:,'EAQ'] = list(acc)
    
    # optional: Plot other models
    for i in range(1, len(model_lst)):
        pred = model_lst[i].predict(x)
        pred_cum = pred.sum(axis=0)
        y_cum = y.sum(axis=0)
        index_pos = y_cum > 0
        acc = (pred_cum[index_pos]-y_cum[index_pos])/y_cum[index_pos]
        ax.plot(acc, label = 'EAQ '+str(i)) #, color = 'green')
        df.loc[:,'EAQ '+str(i)] = list(acc)
    
    # optional: Plot models with plain input
    if (model_plain_lst[0] != None): # assuming reasonable data input
        for i in range(len(model_plain_lst)):
            pred = model_plain_lst[i].predict(x_plain)
            pred_cum = pred.sum(axis=0)
            y_cum = y.sum(axis=0)
            index_pos = y_cum > 0
            acc =(pred_cum[index_pos]-y_cum[index_pos])/y_cum[index_pos]
            ax.plot(acc, label = 'EP '+str(i))
            df.loc[:,'EP '+str(i)] = list(acc)
            
    
    ax.tick_params(axis='y')
    ax.set_ylabel('rce${}_t$', fontsize = 'large')
    ax.set_xlabel('Time, $t$', fontsize = 'large')
    ax.legend()

    plt.tight_layout()

    plt.show()
    
    
    return df