# General Analysis

In [7]:
## Evaluate if the split (train-test) has common data, respectively determine share
def evaluate_split_congruence(x_train, x_test):
    
    n_test = x_test.shape[0]
    n_match = 0
    train_lst = x_train.tolist()
    test_lst = x_test.tolist()
    for i in range(n_test):
        if test_lst[i] in train_lst:
            n_match+=1
    
    return n_match/n_test

In [9]:
# get references for MSE or AE Values
# I.E. Tranfer constant relative discrepancy (given %) per time point 
# to MSE or AE

def relate_loss(data, discrepancy, measure = 'mse'):
    val = 0
    if measure == 'mse':
        val = (np.square(discrepancy*data.flatten())).mean()
    elif measure ==  'mae':
         val = (np.abs(discrepancy*data.flatten())).mean()  
    return val

# Topic: Compare different Settings for Models, Ensembles etc.

In [2]:
# Given a prediction, a target and information about the underlying model's configuration, i.e. Usage of Dropout or
# a Lambda-Scaling Layer, this function returns a list of properties for SE or AE
# This list will eventually be added to a table, to compare models with different configurations

def calc_row_df(prediction, target, measure_type = 'absolute_error',lambda_layer = True, dropout_layer = True, 
                option_relative = False,row_name = None):
    
    metric = 0
    if measure_type == 'absolute_error':
        metric = (np.abs(prediction-target)).flatten()
    elif measure_type == 'squared_error':
        metric = (np.square(prediction-target)).flatten()
    else:
        print('Measure_type unknown')
        return
    
    statistic = describe(metric)
    if lambda_layer ==True:
        lambda_layer = 'yes'
    else:
        lambda_layer = 'no'
    if dropout_layer ==True:
        dropout_layer = 'yes'
    else:
        dropout_layer = 'no'
    
    if row_name == None:
        return dropout_layer, lambda_layer, statistic[1][0], statistic[1][1], statistic[2], statistic[3]
    else:
        return dropout_layer, lambda_layer,[statistic[1][0], statistic[1][1], statistic[2], statistic[3]], row_name

In [1]:
# This function computes and trains rnn models of different configurations, i.e. Dropout (yes/no) and 
# Scaling (yes/no)
# The data analysis relies on the 'calc_row_df'-function.
# It eventually returns a dataframe with 4 rows (for the 4 model types) and 
# 6 columns (recording the model configuration and Min/Max/Mean/Var of the absolute error per time point)

def rnn_single_dim_config_eval(x, y, x_test,y_test, scale, epochs, batch_size, nodes_hidden, 
                               final_dense_layer=True, dense_act_fct = 'linear', act_fct_special = False,
                               dropout_val = [0.2,0.2],val_share = 0.25, measure_type = 'absolute_error'):
    
    if len(nodes_hidden) > len(dropout_val):
        print('No. of dropouts not sufficient for depth of model!' )
        return
    
    INPUT = Input(shape =x.shape[1:3] )
    
    # No Dropout, No Lambda Layer
    rnn_1 = create_rnn_model(model_input = INPUT, nodes = nodes_hidden, n_output = y.shape[1], 
                             final_dense_layer= final_dense_layer, dense_act_fct = dense_act_fct, 
                             act_fct_special= act_fct_special,
                             lambda_layer=False, dropout_option=False, dropout_share=dropout_val)
    
    
    #create_rnn_model(model_input=INPUT,nodes= [n_output], n_output=n_output, 
    #                           final_dense_layer = True,dense_act_fct= ThresholdedReLU(theta=-1.0),
    #                           act_fct_special= True, optimizer_type= 'adam',loss_type='mse', 
    #                           metric_type='mae', dropout_option=False, dropout_share=[0.2,0.2], 
    #                           lambda_layer = True, lambda_scale =V_max, log_scale=True)
    
    
    # scale targets y
    #rnn_1_hist = rnn_1.fit(x, (2*y/scale-1), batch_size = batch_size, epochs = epochs, 
    #                       validation_split=val_share, verbose=0) #, callbacks=[tensorboard])
    #pred_1 = (rnn_1.predict(x_test)+1)/2*scale # Rescale output to compare to actual target

    # Use raw target
    rnn_1_hist = rnn_1.fit(x, y, batch_size = batch_size, epochs = epochs, 
                           validation_split=val_share, verbose=0) #, callbacks=[tensorboard])
    pred_1 = rnn_1.predict(x_test)

    
    # Dropout, No Lambda Layer
    rnn_2 = create_rnn_model(model_input = INPUT, nodes = nodes_hidden, n_output = y.shape[1], 
                             final_dense_layer= final_dense_layer, dense_act_fct = dense_act_fct, 
                             act_fct_special=act_fct_special,
                             lambda_layer=False, dropout_option=True, dropout_share=dropout_val)
    # scale targets y
    #rnn_2_hist = rnn_2.fit(x, (2*y/scale-1), batch_size, epochs = epochs, validation_split=val_share, 
    #                       verbose=0)
    #pred_2 = (rnn_2.predict(x_test)+1)/2*scale # Rescale output to compare to actual target
    
    # Use raw target
    rnn_2_hist = rnn_2.fit(x, y, batch_size, epochs = epochs, validation_split=val_share, 
                           verbose=0)
    pred_2 = rnn_2.predict(x_test)
    
    
    # No Dropout, Lambda Layer
    rnn_3 =  create_rnn_model(model_input = INPUT, nodes = nodes_hidden, n_output = y.shape[1], 
                             final_dense_layer= final_dense_layer, dense_act_fct = dense_act_fct,
                              act_fct_special=act_fct_special,
                             lambda_layer=True, lambda_scale = scale,
                              dropout_option=False, dropout_share=dropout_val)
    rnn_3_hist = rnn_3.fit(x, y, batch_size = batch_size, epochs = epochs, validation_split=val_share, 
                           verbose=0)
    pred_3 = rnn_3.predict(x_test)
    
    # Dropout, Lambda Layer
    rnn_4 =  create_rnn_model(model_input = INPUT, nodes = nodes_hidden, n_output = y.shape[1], 
                             final_dense_layer= final_dense_layer, dense_act_fct = dense_act_fct, 
                              act_fct_special=act_fct_special,
                             lambda_layer=True, lambda_scale = scale, 
                              dropout_option=True, dropout_share=dropout_val)
    rnn_4_hist = rnn_4.fit(x, y, batch_size, epochs = epochs, validation_split=val_share, verbose=0)
    pred_4 = rnn_4.predict(x_test)
    
    # Evaluate Results
    
    # Summarize Results in Table
    dict_pred = {0: [pred_1,False,False], 1: [pred_2,True, False],2: [pred_3, False, True],
                 3: [pred_4, True, True]}
    if measure_type == 'absolute_error':
        df = pd.DataFrame(data=None, index = None, 
                      columns = [ 'Dropout','Scaling Layer','min AE','max AE','MAE','Var(AE)'] )
    if measure_type == 'squared_error':
        df = pd.DataFrame(data=None, index = None, 
                      columns = [ 'Dropout','Scaling Layer','min SE','max SE','MSE','Var(SE)'] )
    for i in range(4):
        df.loc['Model {}'.format(i)] = (calc_row_df(dict_pred[i][0], y_test,dropout_layer= dict_pred[i][1], 
                                                    lambda_layer=dict_pred[i][2],measure_type= measure_type))

    
    return df,[rnn_1, rnn_2, rnn_3, rnn_4],[rnn_1_hist, rnn_2_hist, rnn_3_hist, rnn_4_hist],[pred_1, pred_2, pred_3, pred_4]

In [3]:
## For Several Single-, Ensemble- and Ensemble incl. Qual. - Models perform descriptive analysis
## For Non-Zero Target Values (which are optionally above a threshold): Look at Absolute Error per
## timepoint relative to the target value.
## For Zero-Target Values (or optionally target values below threshold): Look at Absolute Error per time point
## Optionally: Also include a Weighted Relative (Absolute) Error (WRAE) where
## WRAE = target/sum(targets at given time) * error for contract at given time

def create_df_model_comparison(model_single_lst,x_test, y_test, model_ens_lst = [None], 
                               model_ens_qual_lst = [None], model_plain_lst = [None], x_test_plain = None,
                               threshold = 0, wre_measure_option = True,
                               discount_option = False, names_number_adj = None, names_loss_adj = None,
                               names_number_plain = None, names_loss_plain = None,
                               discount_val = 1, version = 'new'):
    
    #Error catching, if no names of loss functions or number of models in ensemble provided
    if names_number_adj == None:
        names_number_adj = ['']*len(model_ens_lst)
        names_loss_adj = ['']*len(model_ens_lst)
        
    if names_number_plain == None:
        names_number_plain = ['']*len(model_ens_lst)
        names_loss_plain = ['']*len(model_ens_lst)
    
    # initialize variables to use for later storage purposes
    n_lst = len(model_single_lst)
    pred = []
    pred_ens = []
    pred_ens_qual = []
    pred_plain=[]
    diff = []
    diff_ens = []
    diff_ens_qual = []
    diff_plain = []
    wre = []
    wre_ens = []
    wre_ens_qual =[]
    wre_plain = []
    
    row = []
    # Determine where target zero and where non-zero -> only for old version, where we looked at two error 
    # namely, part matured (-> relative error) and part non-matured (-> absolute error)
    # In the new version we generalize this by using the weighted relative error
    index_rel = y_test!=0#.flatten()!= 0
    index_abs = y_test==0#.flatten()== 0
    index_th = y_test>=threshold#.flatten()>=threshold
    
    # Determine times where at least one contract is still active, 
    # i.e. time
    #where we can calculate the weighted error w.r.t. the sum of reserves at that point in time
    index_pv_cum = (y_test.sum(axis=0)>0)
    y_test_cum = y_test.sum(axis=0)[index_pv_cum]
    
    
    
    if version == 'new': ## Only look at Weighted Relative Error
        df = pd.DataFrame(data=None, index = None, columns = ['Loss','$N_{Ens}$','min diff${}_t$','max diff${}_t$',
                                                              r'$\widehat{\text{Bias}}$', 
                                                              r'$\widehat{\text{Var}}(\hat{f})$',
                                                              'min WRE${}_t$','mean WRE${}_t$','max WRE${}_t$'] )
        
        # For all standard (individual) Models do as follows:
        for i in range(n_lst):
            # Calculate Predictions
            pred.append(model_single_lst[i].predict(x_test))
            # Calculate Errors for Zero-Target Time Points and Non-Zero Target Time Points seperately
            diff.append(pred[i]-y_test)#.flatten()
            wre.append(diff[i][:,index_pv_cum]/y_test_cum)
            if discount_option:
                # discount each year j by (discount factor)^j # No discounting included, discount_val = 1
                wre[i] = wre[i]/discount_val**np.linspace(0,index_pv_cum.sum()-1, index_pv_cum)
            
            # add statistics to table
            df.loc['EA 0{}'.format(i)] = ('MSE', '1',
                                                   diff[i].flatten().min(), diff[i].flatten().max(),
                                                    diff[i].flatten().mean(), pred[i].var(),
                                                    wre[i].flatten().min(), wre[i].flatten().mean(), 
                                                    wre[i].flatten().max())
            
        # Option Model Ensemble: 
        if model_ens_lst[0] != None:
            for i in range(len(model_ens_lst)):
                pred_ens.append(model_ens_lst[i].predict(x_test))
                diff_ens.append(pred_ens[i]-y_test)#.flatten()
                wre_ens.append(diff_ens[i][:,index_pv_cum]/y_test_cum)
                if discount_option:
                    # discount each year j by (discount factor)^j
                    wre_ens[i] = wre_ens[i]/discount_val**np.linspace(0,index_pv_cum.sum()-1, index_pv_cum)
                
                # Write statistics in table
                df.loc['EA {}'.format(i)] = (names_loss_adj[i], names_number_adj[i],
                                                   diff_ens[i].flatten().min(), diff_ens[i].flatten().max(),
                                                   diff_ens[i].flatten().mean(), pred_ens[i].var(),
                                                   wre_ens[i].flatten().min(), wre_ens[i].flatten().mean(), 
                                                   wre_ens[i].flatten().max())
        
        # Option Model Ensemble incl Qualitative Model
        if model_ens_qual_lst[0] != None:
            for i in range(len(model_ens_qual_lst)):
                pred_ens_qual.append(model_ens_qual_lst[i].predict(x = [x_test])) #, x_test[:,:,2:4]]))
                diff_ens_qual.append(pred_ens_qual[i]-y_test)#.flatten()
                wre_ens_qual.append(diff_ens_qual[i][:,index_pv_cum]/y_test_cum)
                if discount_option:
                    # discount each year j by (discount factor)^j
                    wre_ens_qual[i] = wre_ens_qual[i]/discount_val**np.linspace(0,index_pv_cum.sum()-1, index_pv_cum)

                # ENter statistics in table
                df.loc['EAQ {}'.format(i)] = (names_loss_adj[i], names_number_adj[i],
                                                            diff_ens_qual[i].flatten().min(), 
                                                            diff_ens_qual[i].flatten().max(),
                                                            diff_ens_qual[i].flatten().mean(),
                                                            pred_ens_qual[i].flatten().var(),
                                                            wre_ens_qual[i].flatten().min(), 
                                                            wre_ens_qual[i].flatten().mean(), 
                                                            wre_ens_qual[i].flatten().max())

        # Option Model Ensemble with plain, repetitive Input
        if model_plain_lst[0] != None:
            for i in range(len(model_plain_lst)):
                pred_plain.append(model_plain_lst[i].predict(x = [x_test_plain])) #, x_test[:,:,2:4]]))
                diff_plain.append(pred_plain[i]-y_test)#.flatten()
                wre_plain.append(diff_plain[i][:,index_pv_cum]/y_test_cum)
                if discount_option:
                    # discount each year j by (discount factor)^j
                    wre_plain[i] = wre_plain[i]/discount_val**np.linspace(0,index_pv_cum.sum()-1, index_pv_cum)

                # ENter statistics in table
                df.loc['EP {}'.format(i)] = (names_loss_plain[i], names_number_plain[i],
                                                         diff_plain[i].flatten().min(), 
                                                         diff_plain[i].flatten().max(),
                                                         diff_plain[i].flatten().mean(),
                                                         pred_plain[i].flatten().var(),
                                                         wre_plain[i].flatten().min(), 
                                                         wre_plain[i].flatten().mean(), 
                                                         wre_plain[i].flatten().max())
                
        return df, [diff, diff_ens, diff_ens_qual], [wre, wre_ens, wre_ens_qual]
                
               

    ##### Old Version of Code, for quality control purpose ####  
    elif version == 'old': ## Old Version with RAE, AE and WRAE (including Variances)
    
        df = pd.DataFrame(data=None, index = None, columns = ['min RE','MRE','Max. RE',
                                                              'Min. Diff.','Mean Diff.','Max. Diff',
                                                              'Bias', 'Variance'] )
        

        # For all standard (individual) Models do as follows:
        for i in range(n_lst):
            # Calculate Predictions
            pred.append(model_single_lst[i].predict(x_test))
            # Calculate Errors for Zero-Target Time Points and Non-Zero Target Time Points seperately
            diff = (pred[i]-y_test)#.flatten()
            diff_rel = diff[index_rel & index_th]/y_test[index_rel & index_th]#.flatten())[index_rel & index_th]
            diff_abs = diff[index_abs | np.logical_not(index_th)]

            df.loc['Model {}'.format(i)] = (diff_rel.min(),diff_rel.mean(), diff_rel.max(), 
                                            diff_abs.min(), diff_abs.mean(), diff_abs.max(),
                                            diff.mean(), pred[i].var())                                          


        # Option Model Ensemble: 
        if model_ens_lst[0] != None:
            for i in range(len(model_ens_lst)):
                pred_ens.append(model_ens_lst[i].predict(x_test))
                diff_ens = (pred_ens[i]-y_test)#.flatten()
                diff_ens_rel = diff_ens[index_rel & index_th]/y_test[index_rel & index_th]#.flatten())[index_rel & index_th]
                diff_ens_abs = diff_ens[index_abs | np.logical_not(index_th)]
                df.loc['Model Ensemble {}'.format(i)] = (diff_ens_rel.min(),diff_ens_rel.mean(), 
                                                         diff_ens_rel.max(), 
                                            diff_ens_abs.min(), diff_ens_abs.mean(), diff_ens_abs.max(),
                                            diff_ens.mean(), pred_ens[i].var())                

        # Option Model Ensemble incl Qualitative Model
        if model_ens_qual_lst[0] != None:
            for i in range(len(model_ens_qual_lst)):
                pred_ens_qual.append(model_ens_qual_lst[i].predict(x = [x_test])) #, x_test[:,:,2:4]]))
                diff_ens_qual = (pred_ens_qual[i]-y_test)#.flatten()
                diff_ens_qual_rel = diff_ens_qual[index_rel & index_th]/y_test[index_rel & index_th]#.flatten())[index_rel & index_th]
                diff_ens_qual_abs = diff_ens_qual[index_abs | np.logical_not(index_th)]
                
                df.loc['Model Quant.&Qual. {}'.format(i)] = (diff_ens_qual_rel.min(),diff_ens_qual_rel.mean(), 
                                                             diff_ens_qual_rel.max(), 
                                            diff_ens_qual_abs.min(), diff_ens_qual_abs.mean(), 
                                                             diff_ens_qual_abs.max(),
                                            diff_ens_qual.mean(), pred_ens_qual[i].var())
            #### end version 'old
    else:
        print('Unknown Version.')
        pass
    
    return df, [pred, pred_ens, pred_ens_qual]

In [11]:
## Function to evaluate the fit of a given model w.r.t. the target values
# Aim: We want to examine the observation, that contracts with low reserve values have a worse fit 
# than high reserve value contracts

# Procedure: compute precision (prediction-target)/ target -> per contract
# Calculate those values only for times, where the contract has target reserve > 0 
# i.e. exclude time 0 and time of and after maturity

# Compute average precision per contract
# Compare this to the maximal reserve value (throughout being active) of the contract

# output versions: (1) statistic -> provides table with values (average values for deciles of reserve values)
#                  (2) plot -> plots the results
#                  (3) both


def model_examine_indivual_fit(model, data, targets, output_option = 'plot', PV_max = 1,
                               interval_lst = [0,0.001, 0.005, 0.01,0.2,0.4,0.6,0.8,0.9,1]):
    
    prediction = model.predict(x=data)
    n_contracts = prediction.shape[0]
    index = targets>0
    
    precision_avg = np.zeros(shape=(n_contracts,))
    
    # calculate average precision per contract
    for i in range(n_contracts):
        precision_avg[i] = ((prediction[i,index[i,:]]-targets[i,index[i,:]])/targets[i,index[i,:]]).mean()
    
    # Max Target Reserve per contract
    targets_max = targets.max(axis=1)#implicitely assuming every contract has at least one year with target >0 
    
    if (output_option =='statistic') | (output_option == 'both'):
        targets_max_overall = targets_max.max()
        n_stat = len(interval_lst)
        stat_columns = [None]*(n_stat-1)
        for i in range(1,n_stat):
            stat_columns[i-1] = '{}-{}'.format(interval_lst[i-1], interval_lst[i])
        
        df = pd.DataFrame(data=None, index = None, columns = stat_columns )
        
        # Calculate average of average precisions per intervals (of contracts' max reserves)
        row_avg = np.zeros(shape = (n_stat-1,))
        row_max = np.zeros(shape = (n_stat-1,))
        row_min = np.zeros(shape = (n_stat-1,))
        for i in range(n_stat-1):
            index_interval = (targets_max >= targets_max_overall*interval_lst[i])&(targets_max < targets_max_overall*interval_lst[i+1])
            row_avg[i] = precision_avg[index_interval].mean()
            row_max[i] = precision_avg[index_interval].max()
            row_min[i] = precision_avg[index_interval].min()
        
        # Add Average Precision for all intervall to dataframe
        df.loc['mean re${}_t$'] = row_avg
        df.loc['min re${}_t$'] = row_min # Note: Prediction - Target <0 -> Underestimation
        df.loc['max re${}_t$'] = row_max
        
        if output_option != 'both':
            return df
        
        
    if (output_option =='plot') | (output_option == 'both'):
        plt.scatter(targets_max,precision_avg )
        plt.xlabel('Max. Reserve of Contract', fontsize = 'large')
        plt.ylabel('Average relative Error of Contract', fontsize = 'large')
        plt.show()
        
        if output_option == 'plot':
            return
        else:
            return df
        
    else:
        print('output_option unknown!')
        return

In [22]:
## Relate ensemble models to a q-value (average percentage discrepancy related to MSE or MAE)
def relate_ens_to_q(x, y , x_plain,EA_lst = [None], EAQ_lst = [None], EP_lst = [None] ):
    
    stat_columns = ['']*(2+len(EA_lst)+len(EAQ_lst)+len(EP_lst))
    row_mse  = ['']*(2+len(EA_lst)+len(EAQ_lst)+len(EP_lst))
    row_mae = ['']*(2+len(EA_lst)+len(EAQ_lst)+len(EP_lst))
    for i in range(len(stat_columns)):
        if i<2:
            if i==0:
                stat_columns[i] = 'q=0.05'
            elif i==1:
                stat_columns[i] = 'q=0.01'
            else:
                print('Error')
                return
        elif i<len(EA_lst)+2:
            stat_columns[i] = 'EA {}'.format(i-2)
        elif i< len(EA_lst)+len(EAQ_lst)+2:
            stat_columns[i] = 'EAQ {}'.format(i-len(EA_lst)-2)
        else:
            stat_columns[i] = 'EP {}'.format(i-len(EA_lst)-len(EAQ_lst)-2)
        
    df = pd.DataFrame(data=None, index = None, columns = stat_columns )
    
    row_mse[0], row_mse[1] =np.log(relate_loss(y,0.05, 'mse')), np.log(relate_loss(y,0.01, 'mse'))
    row_mae[0], row_mae[1] =np.log(relate_loss(y,0.05, 'mae')), np.log(relate_loss(y,0.01, 'mae'))
    for i in range(len(stat_columns)):
        if i<2:
            print('') #do nothing
        elif i<len(EA_lst)+2:
            row_mse[i] = np.log(((EA_lst[i-2].predict(x)-y)**2).mean())
            row_mae[i] = np.log(np.abs(EA_lst[i-2].predict(x)-y).mean())
        elif i< len(EA_lst)+len(EAQ_lst)+2:
            row_mse[i] = np.log(((EAQ_lst[i-len(EA_lst)-2].predict(x)-y)**2).mean())
            row_mae[i] = np.log(np.abs(EAQ_lst[i-len(EA_lst)-2].predict(x)-y).mean())
        else:
            row_mse[i] = np.log(((EP_lst[i-len(EA_lst)-len(EAQ_lst)-2].predict(x_plain)-y)**2).mean())
            row_mae[i] = np.log(np.abs(EP_lst[i-len(EA_lst)-len(EAQ_lst)-2].predict(x_plain)-y).mean())
            
    df.loc['log(MSE)'] = row_mse
    df.loc['log(MAE)'] = row_mae
    
    return df

# Topic: Clustering

In [7]:
## Function to visualize the fit of all clusters of the ANN approach
# We compare: (1) Target value, (2) prediction of the ANN, 
# (3) the policy values obtained by classical actuarial methods for the representatives obtained by ANN
# and (4) the Baseline policy value of K-Means

# Input formats: targets as matrix (cumulative per cluster), cluster_members_count as vector, ann_prediction as matrix
# ann_classical as list (for ceiled and floored representative), 
# kmeans_baseline as list (for ceiled and floored representative)(cumulative per cluster)
# plot_type can be 'single' (each cluster indivicually) or 'cumulative' (portfolio view)


## THis function has been improved by 'analyze_clustering' in the collection of functions 'clustering'


def visualize_cluster_fit(targets, cluster_members_count, ann_prediction=None, 
                          ann_classical_up=None, ann_classical_low=None,
                          kmeans_baseline_up=None, kmeans_baseline_low=None,
                          plot_type = 'single',
                          n_columns = 4, figsize = (20,30)):
    
    N_clusters = targets.shape[0]
    
    if plot_type == 'single':
        fig, ax = plt.subplots(nrows = np.ceil(N_clusters/n_columns).astype('int'), 
                               ncols = n_columns, figsize = figsize)
        ax = ax.flatten()

        for i in range(N_clusters):
            # Actual Targets
            ax[i].plot(targets[i,:], 'r*', label = 'Target')
            # Predicted Reserve by ANN
            ax[i].plot(ann_prediction[i,:]*cluster_members_count[i], color = 'blue', label = 'ANN - Prediction')
            # Reserve based on classical calculation using representative contracts of ANN approach
            ax[i].plot(cluster_rep_rd_up_pv[i,:]*cluster_members_count[i], color = 'orange', 
                       label = 'ANN - Classical')
            ax[i].plot(cluster_rep_rd_low_pv[i,:]*cluster_members_count[i], color = 'orange') 
            # Reserve based on K-Means clustering
            ax[i].plot(kmeans_baseline_up[i,:], linestyle = ':', color = 'grey', 
                       label = 'K-Means Baseline')
            ax[i].plot(kmeans_baseline_low[i,:], linestyle = ':', color = 'grey')
            if i == 0:
                ax[i].legend()
    elif plot_type == 'cumulative':
        # Look at cumulative cluster fit
        plt.plot(targets.sum(axis=0), 'r*', label = 'Target') # not scaled by numbers
        plt.plot((ann_prediction*cluster_members_count).sum(axis=0), label = 'ANN - Prediction')
        plt.plot((kmeans_baseline_up).sum(axis=0), color = 'grey', linestyle = ':', 
                 label ='K-Means Baseline' )
        plt.plot((kmeans_baseline_low).sum(axis=0), color = 'grey', linestyle = ':')
        plt.plot((cluster_rep_rd_up_pv*count_v1).sum(axis = 0), color = 'orange', label = 'ANN - Classical')
        plt.plot((cluster_rep_rd_low_pv*count_v1).sum(axis = 0), color = 'orange')
        plt.legend()      
        
    else:
        print('plot_type unknown!')
        return

    plt.tight_layout()
    plt.show()
    return 