# Calculate and graph performance summary statistics #

** Author: Andrew Larkin **, Oregon State University College of Public Health and Human Sciences <br>
** Date created: ** October 24, 2018

### Summary ###
Part of the hyperparameter tuning process, this script reads performance dictionaries in pickled format, calculates summary statistics, and graphs the Mathews Correlation Coefficient (MCC) and cost as a function of hyperparameter values and epoch number

### Import libraries and define global variables and constants ###

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import copy
from matplotlib.pyplot import figure

In [None]:
# define input and output filepaths
parentFolder = "C:/Users/larkinan/Desktop/DBTraining/"
performFolder = parentFolder + "modelTrainingPerformance/"

# storage locations for performance dictionaries (pickled) for each hyperparameter to be tuned
performancePickleParams = {
    "parentFolder":performFolder,
    "learningRate":performFolder + "learningRate_Extended",
    "batchSizeFolder":performFolder + "batchSize",
    "postLSTMLayerSize":performFolder + "postLSTMLayerSize",
    "postLSTMLayers":performFolder + "postLSTMLayers",
    "keepProb":performFolder + "keepProb",
    "hiddenLayerActivation":performFolder + "activationType"
}

In [None]:
# load pickled datasets of training model performance into memory

def readPickledPerformanceDatasets(inFolder):
    filesToRead = os.listdir(inFolder)
    dictArray = []
    for filename in filesToRead:
        performDict = pickle.load(open(inFolder + "/" + filename,'rb'))
        dictArray.append(performDict)
    return(dictArray)

### calculate mean and std dev of the cost function for training and dev sets ###
**Inputs:** <br>
- **performanceDictArray** (dict) - contains performance metrics at set epoch intervals for a single model. <br>
- **epochVals** (integer array) - set of epoch steps as which performance metrics are calculated <br>

**Outputs:** <br>
- **meanTrainCost** (np float array) - mean cost for each epoch step in a random sample from the train set across multiple models trained with the same hyperparameters <br>
- **stdDevTrainCost** (np float array) - std dev cost for each epoch step in a random sample from the train set across multiple models trained with the same hyperparameters <br>
- **meanDevCost** (np float array) - mean cost for each epoch step in a random sample from the dev set across multiple models trained with the same hyperparameters <br>
- **stdDevDevCost** (np float array) - std dev cost for each epoch step in a random sample from the dev set across multiple models trained with the same hyperparameters <br>

In [None]:
def calcSummaryStatsCost(performanceDictArray,epochVals):
    numBatches = len(performanceDictArray)
    numEpochs = len(epochVals)
    
    # np matrices containing cost values for all batches and all epochs
    performanceTrainArray = np.zeros((numBatches,numEpochs))
    performanceDevArray = np.zeros((numBatches,numEpochs))
    
    # for each batch, store cost values in the corresponding row
    for batchNum in range(numBatches):
        # for each epoch, store cost values in the corresponding column
        for epochNum in range(numEpochs):
            performanceTrainArray[batchNum,epochNum] = performanceDictArray[batchNum]['trainDict']['Cost'][epochNum]
            performanceDevArray[batchNum,epochNum] = performanceDictArray[batchNum]['devDict']['Cost'][epochNum]       

    #outputs
    meanTrainCost = np.nanmean(performanceTrainArray,axis=0)
    stdDevTrainCost = np.nanstd(performanceTrainArray,axis = 0)
    meanDevCost = np.nanmean(performanceDevArray,axis=0)
    stdDevDevCost = np.nanstd(performanceDevArray,axis = 0)
    
    return(meanTrainCost,stdDevTrainCost,meanDevCost,stdDevDevCost)

### calculate mean and std performance score (FScore or MCC) for a set of hyperparameters ###
**Inputs:** <br>
- **performanceDictArray** (dict) - contains performance metrics at set epoch intervals for a single model. <br>
- **epochVals** (integer array) - set of epoch steps as which performance metrics are calculated <br>
- **outComeIndex** (integer) - column containing the outcome of interest.  MCC Score is column 0, F1 Score is column 1 <br>

**Outputs:** <br>
- **meanTrainVals** (np float array) - mean performance score for each epoch step in a random sample from the train set aross multiple models trained with the same hyperparameters <br>
- **stdDevTrainVals** (np float array) - std dev performance core for each epoch step in a random sample from the train set across multiple models trained with the same hyperparameters <br>
- **meanDevVals** (np float array) - mean performance score for each epoch step in a random sample from the dev set aross multiple models trained with the same hyperparameters <br>
- **stdDevTrainVals** (np float array) - std dev performance core for each epoch step in a random sample from the dev set across multiple models trained with the same hyperparameters <br>

In [None]:
def calcSummaryStatsSingleOutcome(performanceDictArray,epochVals,outcomeIndex):
    numBatches = len(performanceDictArray)
    numEpochs = len(epochVals)
    
    # np matrices containing performance scores for all batches and all epochs
    performanceTrainArray = np.zeros((numBatches,numEpochs))
    performanceDevArray = np.zeros((numBatches,numEpochs))
    
    # for each batch, store performance score in the corresponding row
    for batchNum in range(numBatches):
        # for each epoch, store performance score in the corresponding column
        for epochNum in range(numEpochs):
            performanceTrainArray[batchNum,epochNum] = performanceDictArray[batchNum]['trainDict']['F1Score'][epochNum][outcomeIndex][0]
            performanceDevArray[batchNum,epochNum] = performanceDictArray[batchNum]['devDict']['F1Score'][epochNum][outcomeIndex][0]        
            
    # outputs
    meanTrainVals = np.nanmean(performanceTrainArray,axis=0)
    stdDevTrainVals = np.nanstd(performanceTrainArray,axis = 0)
    meanDevVals = np.nanmean(performanceDevArray,axis=0)
    stdDevDevVals = np.nanstd(performanceDevArray,axis = 0)
    
    return(meanTrainVals,stdDevTrainVals,meanDevVals,stdDevDevVals)

### caulcate summary stats for a single parameter value that's being tuned ### 
**Inputs** <br>
- **summaryStatsDict** (dictionary) - contains performance of training and dev sets for the hyperparameter being tuned
    - 'trainDict' - dictionary with cost and performance metrics based on evaluating a sample of the train set
    - 'devDict' - dictionary with cost and performance metrics based on evaluting a sample of the dev set
    - 'epochVals' - epoch steps at which cost and performance metrics were calculated
- **performanceDictArray** (dictionary) - contains performance metrics at set epoch intervals for a single model. <br>

No outputs.  Rather, the function appends data in the summaryStatsDict

In [None]:
def calcSummaryStatsSingleParamValue(summaryStatsDict,performanceDictArray):
    epochVals = summaryStatsDict['epochVals']
    numBatches = len(performanceDictArray)
    epochVals.sort()
    
    outcomeMeanNames = summaryStatsDict['trainDict']['outcomeNames']
    outcomeDevNames = summaryStatsDict['trainDict']['outcomeDevNames']
    
    # column 1 is mean, column 2 is std dev, rows are different outcomes
    
    # for each outcome of interest except for cost, calculate mean and std dev for train and dev samples 
    for index in range(len(outcomeNames)):
        meanTrain, stdDevTrain, meanDev, stdDevDev = calcSummaryStatsSingleOutcome(performanceDictArray,epochVals,index)
        
        # if outcome of interest is not yet in the summary stats dict, create a key and instantiate.  Otherwise append to it.
        if(outcomeMeanNames[index] not in summaryStatsDict['trainDict'].keys()):
            summaryStatsDict['trainDict'][outcomeMeanNames[index]] = [meanTrain]
            summaryStatsDict['trainDict'][outcomeDevNames[index]] = [stdDevTrain]
            summaryStatsDict['devDict'][outcomeMeanNames[index]] = [meanDev]
            summaryStatsDict['devDict'][outcomeDevNames[index]] = [stdDevDev]
            
        else:   
            summaryStatsDict['trainDict'][outcomeMeanNames[index]].append([meanTrain])
            summaryStatsDict['trainDict'][outcomeDevNames[index]].append([stdDevTrain])
            summaryStatsDict['devDict'][outcomeMeanNames[index]].append([meanDev])
            summaryStatsDict['devDict'][outcomeDevNames[index]].append([stdDevDev])
     
    # calculate mean and std dev of cost function for train and dev samples 
    meanTrain,stdDevTrain, meanDev, stdDevDev = calcSummaryStatsCost(performanceDictArray,epochVals)
    
    # it cost is not yet in summary stats dict, create a key and instantiate.  Otherwise, append to it.
    if('cost' not in summaryStatsDict['trainDict'].keys()):
        summaryStatsDict['trainDict']['cost'] = [meanTrain]
        summaryStatsDict['trainDict']['costErr'] = [stdDevTrain]
        summaryStatsDict['devDict']['cost'] = [meanDev]
        summaryStatsDict['devDict']['costErr'] = [stdDevDev]
    else:
        summaryStatsDict['trainDict']['cost'].append([meanTrain])
        summaryStatsDict['trainDict']['costErr'].append([stdDevTrain])
        summaryStatsDict['devDict']['cost'].append([meanDev])
        summaryStatsDict['devDict']['costErr'].append([stdDevDev])

### create subplot to fit within a series of plots.  Each subplot contains multiple values of a single hyperparameter ###
**Inputs:**<br>
- **dataDict** (dict) - dictionary containing data to graph <br>
- **yParam** (string) - name of key that corresponds to y-value to graph <br>
- **errParam** (string) - name of key that corresponds to std err tograph <br>
- **colorVec** (string array) - hexadecimal values for colors to distinguish between parameter values <br>
- **subplotIndex** (int) - which subplot space within the entire plot area to create the graph <br>
- **yLabel** (string) - y axis label <br>
- **xLabel** (string) - x axis label <br>
- **subplotTitle** (string) - title for subplot <br>

In [None]:
def createSubplot(dataDict,yParam,errParam,colorVec,subplotIndex,yLabel,xLabel,subplotTitle):
    xVals = dataDict['epoch']
    yVals = dataDict[yParam]
    errVals = dataDict[errParam]
    paramVals = dataDict[dataDict['paramName']]
    xMax = max(xVals) + 10
    
    tempAxis = plt.subplot(8,2,subplotIndex)
    tempAxis.set_xlim([0,xMax])
    tempAxis.set_ylim([0,1.1])
    print(xMax)
    tempAxis.set_title(subplotTitle)
    if(1>2):
        print('a')
    #if(xLabel == None):
    #    plt.setp(tempAxis.get_xticklabels(),visible=False)
    else:
        plt.xlabel(xLabel)
    if(yLabel == None):
        plt.setp(tempAxis.get_yticklabels(), visible=False)
    else:
        plt.ylabel(yLabel)
    for i in range(len(paramVals)):
        currYVals =  np.asarray(yVals[i])
        currYVals = currYVals.reshape((len(xVals),))
        yErrVals = np.asarray(errVals[i]).reshape((len(xVals),))
        currColor = colorVec[i]
        #marker = 'o'
        plt.plot( xVals, currYVals, marker='None', markerfacecolor=currColor, label = paramVals[i], alpha = 0.8, color=currColor, linewidth=1)
        #plt.errorbar(xVals, currYVals.tolist(), yerr=np.asarray(yErrVals).tolist(),color = currColor,alpha = 0.2)

In [None]:
# print model params on output figure.  Not used in current version but retained for optional future use.

def printModelParams(dataDict):
    testParam = dataDict['paramName']
    paramsInDict = dataDict.keys()
    modelParams = ['mini_batch_size', 
                   'learning_rate', 
                   'momentum',
                   'num_outcomes',
                   'postLSTM_layer_size',
                   'postLSTM_layers',
                   'pre_softmax_layer_size',
                   'keep_prob', 
                   'hidden_layer_activation',
                   ]
    outputText = "Other Tunable Model Params: \n"
    for printParam in modelParams:
        if printParam != testParam and printParam in paramsInDict:
            outputText += printParam + ": " + str(dataDict[printParam]) + "\n"
    plt.figtext(0.1,-0.18,outputText,horizontalalignment='left')

### create all subplots for a figure to evaluate model performance of various values for a single hyperparameter ###
**Inputs:**<br>
- **statsDict** (dict) - dictionary containing values to plot <br>
- **outputFilepath** (string) - filepath where output figure file should be saved <br>

In [None]:
def createGraphs(statsDict,outputFilepath):
    trainDict = statsDict['trainDict']
    devDict = statsDict['devDict']
    fig = figure(num=None, figsize=(10, 18), dpi=160, facecolor='w', edgecolor='k')
    fig.suptitle(trainDict['paramName'] + " Performance", fontsize=16)
    colorVec = ['#003f5c','#2f4b7c','#665191','#a05195','#d45087','#f95d6a','#ff7c43','#ffa600']
    epochVals = trainDict['epoch']
    maxEpoch = max(trainDict['epoch'])
    stepLen = (maxEpoch)/(len(trainDict['epoch']))
    numParams = len(trainDict['cost'][0])
    plt.xticks(np.arange(0, maxEpoch + 10, step=stepLen))
    outcomeNames = copy.deepcopy(statsDict['trainDict']['outcomeNames'])
    outcomeNames.append('cost')
    devNames = statsDict['trainDict']['outcomeDevNames']
    devNames.append('costErr')
    for index in range(len(outcomeNames)):
        currOutcome = outcomeNames[index]
        currDev = devNames[index]
        createSubplot(trainDict,currOutcome,currDev,colorVec,index*2+1,currOutcome,None,"Training " + currOutcome)
        createSubplot(devDict,currOutcome,currDev,colorVec,(index+1)*2,currOutcome,None,"Dev " + currOutcome)
    fig.legend(bbox_to_anchor=(0.4, 0.18, 0, 0),loc='lower center', ncol=numParams)
    
    
    
    #printModelParams(trainDict) # currently not used.  Retained for optional future functionality
    
    plt.savefig(outputFilepath, bbox_inches="tight")
    
    #plt.show() # only use for debugging purposes

### main function ###

In [None]:
# load data from hard drive
datasets = readPickledPerformanceDatasets(performancePickleParams['learningRate'])

modelParam = 'learning_rate'
uniqueParams = []
for dataset in datasets:
    paramVal = dataset['modelParams'][modelParam]
    if(paramVal not in uniqueParams):
        uniqueParams.append(paramVal)

epochVals = list(set(datasets[0]['trainDict']['EpochNum']))
epochVals.sort()
outcomeNames = ['m_nature','m_safety','m_beauty','m_exercise','m_social','m_air','m_other']
outcomeDevNames = ['sd_nature','sd_safety','sd_beauty','sd_exercise','sd_social','sd_air','sd_other']
trainDict = {
    'datasetName':'train',
    'paramName':'learning_rate',
    'epoch':epochVals,
    'outcomeNames':outcomeNames,
    'outcomeDevNames':outcomeDevNames,
    'learning_rate':uniqueParams
}
devDict = {
    'datasetName':'dev',
    'paramName':'learning_rate',
    'epoch':epochVals,
    'outcomeNames':outcomeNames,
    'outcomeDevNames':outcomeDevNames,
    'learning_rate':uniqueParams
}   


summaryStatsDict = {'trainDict':trainDict,'devDict':devDict,'epochVals':epochVals,'numBatches':10}

for uniqueParam in uniqueParams:
    paramDatasetSubsets = []
    for dataset in datasets:
        paramVal = dataset['modelParams'][modelParam]
        if(paramVal == uniqueParam):
            paramDatasetSubsets.append(dataset)
    calcSummaryStatsSingleParamValue(summaryStatsDict,paramDatasetSubsets)

    
createGraphs(summaryStatsDict,"C:/users/larkinan/desktop/testPerformanceGraphsv_Extended.eps")