# Calculate and graph performance summary statistics #

** Author: Andrew Larkin **, Oregon State University College of Public Health and Human Sciences <br>
** Date created: ** January 5th, 2018

### Summary ###
For evaluating performance of candidate models in train, dev, test, and independent datasets.  Calculate confusion matrices of model-dataset combinations.  Graph precision and recall for each model-dataset combation and outcome.

This script is divided into two parts:
1) Calculate performance metrics and output to CSV
2) Graph performance metrics and save to .eps file (or print to screen)

### Import libraries and define global variables and constants ###

In [13]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import copy
from matplotlib.pyplot import figure
import pandas as ps
import matplotlib.lines as mlines

In [21]:
# define input and output filepaths
parentFolder = "C:/Users/larkinan/Desktop/DBTraining/"
performFolder = parentFolder + "modelTrainingPerformance/" 
performCSV_Denorm = performFolder + "ModelPerformance_v1.csv"   # intermediate file containing precision and recall estimates to graph


In [14]:
# load performanceMeasuresFromCSV
def loadPerformanceData(dataFilepath,debug=False):
    rawData = ps.read_csv(dataFilepath)
    
    # Second step in case future versions need to process input data
    processedData = rawData
    
    if(debug):
        print(processedData.head())
        keys = processedData.keys()
        print("number of records: %i" %len(processedData[keys[0]]))
    
    return(rawData)

### For each subplot, setup graph properties including axis boundaries, titles, and reference lines ###
**Inputs:** <br>
- **tempAxis** (object) - matplotlib axis object for the current subplot <br>
- **xDim** (2 element float array) - min and max boundaries for the x axis
- **yDim** (2 element float array) - min and max boundaries for the y axis
- **yLabel** (string) - optional label for the yaxis
- **xLabel** (string) - optional label for the xaxis
- **outcome** (string) - optional subplot title

In [15]:
def setAxisProperties(tempAxis,xDim,yDim,yLabel=None,xLabel = None,title=None):
    v_line = mlines.Line2D([0.7, 0.7], [0, 1], color='black',linestyle='dashed')
    h_line = mlines.Line2D([0,1],[0.7,0.7],color='black',linestyle='dashed')
    tempAxis.set_xlim(xDim)
    tempAxis.set_ylim(yDim)
    tempAxis.add_line(v_line)
    tempAxis.add_line(h_line)
    if(yLabel):
        tempAxis.set_ylabel(yLabel)
    if(xLabel): 
        tempAxis.set_xlabel(xLabel)
    if(title):
        tempAxis.set_title(title)

### plot subset for one outcome of interest.  ### <br>
plot includes all models and datasets for the outcome.  Colors and markers correspond to dataset and model, respectively <br>

**Inputs** <br>
- **performData** (pandas dataframe) - contains denormalized records of the dataset.  Important keys in the dataset include: <br>
    1) outcome - which outcome the record corresponds to <br>
    2) dataset - which dataset the record corresponds to (e.g. train, dev, test, etc.) <br>
    3) model - which model the record corresponds to <br>
    4) precision <br>
    5) recall <br>
    
    
- **outcome** (string) - outcome of interest.  Used to screen dataset
- **outcomeVar** (string) - key for the column in the dataset that contains the outcome label
- **subPlotMatrix** (3 element int array) - dimensions of the master plot and which index to push the subplot
- **xDim** (2 element float array) - min and max boundaries of the x-axis
- **yDim** (2 element float array) - min and max boundaries of the y-axis
- **colorVec** (string array) - colors to distinguish between origin datasets (e.g. train, dev,) in the subplot <br>
- **markerVec** (string array) - markers to diistinguish between models in the subplot <br>

**Outputs** <br>
- **markerDict** (dict) - marker:model pairs.  Mostly for debug purposes
- **colorDict** (dict) - color:dataset pairs.  Mostly for debug purposes

In [41]:
def plotByOutcome(performData,outcome,outcomeVar,subplotMatrix = [4,2,1],
                  xDim = [0.4,1],yDim = [0.2,1],
                  colorVec = ['red','blue','green','#a05195','#d45087','#f95d6a','#ff7c43','#ffa600'], 
                  markerVec = ['+','o','s','d','x','^'],debug=False):
    
    
    
    # setup axis and graph properties
    tempAxis = plt.subplot(subplotMatrix[0],subplotMatrix[1],subplotMatrix[2])
    setAxisProperties(tempAxis,xDim,yDim,'Recall','Precision',outcome)
    
    # setup local variables 
    outcomeSubset = performData.loc[performData[outcomeVar] == outcome]  # data subset for the outcome of interest
    dataOrigins = sorted(set(outcomeSubset['Dataset']))          # set of datasets
    models = sorted(set(outcomeSubset['Model']))                 # set of models
    compareLineX = [0,0]                                         # x coords for line comparing two models
    compareLineY = [0,0]                                         # y coords for line comparing two models
    modelsOfInterest = ['Whole','NoEmbed']                       # models to comapre
    markerDict = {}                                              # store marker:model pairs
    colorDict = {}                                               # store color:dataset pairs
    
    
    # for each dataset and each model, plot the precision vs. recall value 
    # with the appropriate (color,symbol) type
    for origin in dataOrigins:    
        
        # subset records to the current 'origin' dataset of interest
        originSubset = outcomeSubset.loc[outcomeSubset['Dataset'] == origin]
        datasetColor = colorVec[dataOrigins.index(origin)]
        colorDict[origin] = datasetColor
        
        # for the current dataset, go thorugh each model type
        for model in models:
            
            # only one datapoint for each model-dataset combination
            dataPoint = originSubset.loc[originSubset['Model'] == model]
            
            # setup a line to compare difference in precision and recall 
            # between the worst- and best-performing models
            if model in modelsOfInterest:
                modelIndex = modelsOfInterest.index(model)
                compareLineX[modelIndex] = float(dataPoint['Precision'])
                compareLineY[modelIndex] = float(dataPoint['Recall'])
            
            
            modelMarker = markerVec[models.index(model)]
            markerDict[model] = modelMarker
            plt.scatter(
                dataPoint['Precision'], 
                dataPoint['Recall'],
                marker = modelMarker,
                color = datasetColor
            )
            
        # add line comparing worst- and best-performing model for the current dataset
        compare_line = mlines.Line2D(compareLineX,compareLineY,color=datasetColor)
        tempAxis.add_line(compare_line)
        
        
    # when debugging, push graph to screen rather than saving to file
    if(debug):
        plt.show()
        
        
    return(colorDict,markerDict)

### plot precision vs. recall performance for all outcomes, datasets, and models ###
Graph consists of multiple subplots, with one subplot for each outcome.  Points are colored by originating dataset (e.g. train, dev, test), while markers correspond to model (e.g. whole model, model with no emoticons, etc.)

**Inputs** <br>
- **performData** (pandas dataframe) - contains all records and labels for all outcomes.  Keys include: <br>
    1) outcome - which outcome the record corresponds to <br>
    2) dataset - which dataset the record corresponds to (e.g. train, dev, test, etc.) <br>
    3) model - which model the record corresponds to <br>
    4) precision <br>
    5) recall <br>

- **xLims** (2 element float array) - x-axis boundaries <br>
- **yLims** (2 element float array) - y-axis boundaries <br>
- **title** (string) - optional graph title
- **outputFilename** (string) - absolute filepath where the plot will be saved.  If none, plot is pushed to notebook <br>

In [40]:
def plotPrecisionRecall(performData,xLims,yLims,title="Precision vs. Recall",outputFilename=None):
    
    
    # local variable setup.  
    fig = figure(num=None, figsize=(8, 16), dpi=160, facecolor='w', edgecolor='k')
    fig.suptitle(title, fontsize=16)
    uniqueOutcomes = set(performData['Outcome'])
    
    
    index = 1        # subplot indeces start at 1, not 0
    colorDict, marerDict = [{} for x in range(2)]
    
    for outcome in uniqueOutcomes:
        colorDict, markerDict = plotByOutcome(
            performData,
            outcome,
            'Outcome',
            [4,2,index],
            xLims[index-1],
            yLims[index-1]
        )
        index+=1

    if(not outputFilename):
        print(colorDict)
        print(markerDict)
        plt.show()

    else:
        #plt.savefig(performFolder + "ModelPerformance.eps", bbox_inches="tight")
        plt.savefig(outputFilename,bbox_inches="tight")

In [26]:
def createPerformanceGraphs(performCSV):
    
    performDataset = loadPerformanceData(performCSV,True)
    
    # plot graphs with different x- y-axis boundaries for each outcome
    yLims = [[0.4,1],[0.4,1],[0.5,1],[0.2,1],[0.5,1],[0.7,1],[0.0,1]]
    xLims = [[0.4,1],[0.7,1],[0.7,1],[0.7,1],[0.7,1],[0.7,1],[0.4,1]]
    outputFilepath = performFolder + "precRecallVaryingBound.eps"
    plotPrecisionRecall(performDataset,xLims,yLims,"Precision vs. Recall with Varying Boundaries",outputFilepath)
    
    # plot graphs with the same x- and y-axisboundaries for each outcome
    yLims = [[0.0,1],[0.0,1],[0.0,1],[0.0,1],[0.0,1],[0.0,1],[0.0,1]]
    xLims = [[0.4,1],[0.4,1],[0.4,1],[0.4,1],[0.4,1],[0.4,1],[0.4,1]]
    outputFilepath = performFolder + "precRecallFixedBound.eps"
    plotPrecisionRecall(performDataset,xLims,yLims,"Precision vs. Recall with Set Boundaries",outputFilepath)

In [39]:
createPerformanceGraphs(performCSV_Denorm)

      Outcome     Model   Dataset  Precision  Recall
0   Aesthetic  Emoticon  Training       0.92    0.81
1      Safety  Emoticon   NewYork       0.92    0.36
2         Air  Emoticon  Training       0.89    0.76
3  Greenspace  Emoticon  Training       0.88    0.92
4   Aesthetic  Emoticon      Test       0.88    0.66
number of records: 164
