In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "" 

from tabulate import tabulate
from scipy.interpolate import interp1d
import numpy as np
import math
from MAPScorer import MAPScorer
from scipy import interp
from myLSTM import MyLSTM
from sklearn.metrics import roc_curve, precision_recall_curve, auc, roc_auc_score, precision_recall_fscore_support, f1_score, accuracy_score, cohen_kappa_score
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
outputPlotDir = "plots/lstm"

In [3]:
lstm = MyLSTM()
lstm.loadData()
lstm.loadModels()
#lstm.loadFineTuning3Model()

Load models


In [4]:
#tasks = ['sede1', 'sede12', 'sede2ft3', 'morfo1', 'morfo2']
tasks = ['sede1', 'sede12', 'morfo1', 'morfo2']
ftTasks = ['sede2ft3']
yp = {}
ycn = {}
yc = {}
ytn = {}
yt = {}
for task in tasks:
    print(task)
    if not task in ftTasks:
        yp[task] = lstm.model[task].predict_proba(lstm.XTest)
        ycn[task] = lstm.model[task].predict_classes(lstm.XTest)
    else:
        if task == 'sede2ft':
            yp[task] = lstm.model[task].predict_proba([lstm.XTest, lstm.XTest])
            ycn[task] = lstm.model[task].predict_classes([lstm.XTest, lstm.XTest])
        elif task == 'morfo1ft4':
            yp[task] = lstm.model[task].predict_proba([lstm.yTest['sede1'], lstm.yTest['morfo2'], lstm.XTest])
            ycn[task] = lstm.model[task].predict_classes([lstm.yTest['sede1'], lstm.yTest['morfo2'], lstm.XTest])
        elif task == 'morfo1ft6':
            yp[task] = lstm.model[task].predict_proba([lstm.yTest['morfo2'], lstm.XTest])
            ycn[task] = lstm.model[task].predict_classes([lstm.yTest['morfo2'], lstm.XTest])
        else:
            yp[task] = lstm.model[task].predict_proba([lstm.yTest['sede1'], lstm.XTest])
            ycn[task] = lstm.model[task].predict_classes([lstm.yTest['sede1'], lstm.XTest])
    
    if not task in ftTasks:
        yt[task] = lstm.yTest[task]
    
        ytn[task] = np.zeros_like(ycn[task])
        for i,v in enumerate(yt[task]):
            ytn[task][i] = np.nonzero(yt[task][i])[0][0]
        
    else:
        if task == 'sede2ft3':
            yt[task] = yt['sede12']
            ytn[task] = ytn['sede12']
        elif task == 'morfo1ft4' or task == 'morfo1ft5' or task == 'morfo1ft6':
            yt[task] = yt['morfo1']
            ytn[task] = ytn['morfo1']
        else:
            yt[task] = yt['sede2']
            ytn[task] = ytn['sede2']
        
    yc[task] = np.zeros_like(yt[task])
    for i,v in enumerate(ycn[task]):
        yc[task][i][v] = 1
   

sede1
sede12
morfo1
morfo2


In [6]:
table = [["task", "average", "meanAvgPrec", "accuracy", "kappa", "precision", "recall", "f1score"]]
na = 'N/A'
metrics = {}
for task in tasks:
    table.append([" ", " ", " ", " ", " ", " ", " "])
    metrics[task] = {}
    metrics[task][na] = {}
    metrics[task][na]['meanAvgPrec'] = MAPScorer().score(yt[task], yp[task])
    metrics[task][na]['accuracy'] = accuracy_score(yt[task], yc[task])
    metrics[task][na]['kappa'] = cohen_kappa_score(ytn[task], ycn[task])
    table.append([task, na, metrics[task][na]['meanAvgPrec'], metrics[task][na]['accuracy'], metrics[task][na]['kappa'], na, na, na])
    for avg in ['micro', 'macro', 'weighted']:
        metrics[task][avg] = {}
        metrics[task][avg]['precision'], metrics[task][avg]['recall'], metrics[task][avg]['f1score'], metrics[task][avg]['support'] = precision_recall_fscore_support(yt[task], yc[task], average=avg)
        table.append([task, avg, na, na, na, metrics[task][avg]['precision'], metrics[task][avg]['recall'], metrics[task][avg]['f1score']])
print(tabulate(table))
        

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


------  --------  ------------------  ------------------  ------------------  -------------------  -------------------  -------------------
task    average   meanAvgPrec         accuracy            kappa               precision            recall               f1score

sede1   N/A       0.9364649451945009  0.9024079094243342  0.890846510747241   N/A                  N/A                  N/A
sede1   micro     N/A                 N/A                 N/A                 0.9024079094243342   0.9024079094243342   0.9024079094243342
sede1   macro     N/A                 N/A                 N/A                 0.5900548632697113   0.519477268146323    0.5390695533692659
sede1   weighted  N/A                 N/A                 N/A                 0.8942594582122126   0.9024079094243342   0.8955120448451808

sede12  N/A       0.8147925075205438  0.7210439589645458  0.7042799664652666  N/A                  N/A                  N/A
sede12  micro     N/A                 N/A                 N/A    

In [None]:
def _calculateMicroMacroCurve(curveFunction, yt, yp):
        n_classes = yt.shape[1]
        abscissa = dict()
        ordinate = dict()
        area = dict()
        for i in range(n_classes):
            abscissa[i], ordinate[i] = curveFunction(yt[:, i], yp[:, i])
            area[i] = auc(abscissa[i], ordinate[i])
        abscissa["micro"], ordinate["micro"] = curveFunction(yt.ravel(), yp.ravel())
        area["micro"] = auc(abscissa["micro"], ordinate["micro"])
        # aggregate all
        all_rec = list(filter(lambda x: not math.isnan(x), np.unique(np.concatenate([abscissa[i] for i in range(n_classes)]))))

        # interpolate all prec/rec curves at this points
        mean_ordinate = np.zeros_like(all_rec)
        representedClasses = 0
        unrepresentedClasses = 0
        for i in range(n_classes):
            interp = interp1d(abscissa[i], ordinate[i])
            curr_ordinate = interp(all_rec)
            if not np.any([math.isnan(x) for x in abscissa[i]]) and not np.any([math.isnan(x) for x in ordinate[i]]):
                mean_ordinate += curr_ordinate
                representedClasses += 1
            else:
                unrepresentedClasses += 1

        # average it and compute AUC
        mean_ordinate /= representedClasses

        abscissa["macro"] = all_rec
        ordinate["macro"] = mean_ordinate
        area["macro"] = auc(abscissa["macro"], ordinate["macro"])

        return (abscissa, ordinate, area)

In [None]:
rec = {}
pre = {}
pr_auc = {}
fpr = {}
tpr = {}
roc_auc = {}
for task in tasks:
    rec[task], pre[task], pr_auc[task] = _calculateMicroMacroCurve(lambda y,s: (lambda t: (t[1],t[0]))(precision_recall_curve(y,s)), yt[task], yp[task])
    fpr[task], tpr[task], roc_auc[task] = _calculateMicroMacroCurve(lambda y,s: (lambda t: (t[0],t[1]))(roc_curve(y,s)), yt[task], yp[task])

In [None]:
titles = {
    'sede1':'site', 
    'sede2':'subsite',
    'sede12':'full site', 
    'sede2ft':'subsite st',
    'sede2ft2':'subsite st',
    'sede2ft3':'full site st',
    'morfo1':'type',
    'morfo2':'behaviour',
    'morfo12':'type/behaviour',
    'morfo1ft4':'type st',
    'morfo1ft5':'type st',
    'morfo1ft6':'type st',
}

In [None]:
for task in tasks:
    fig = plt.figure()
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(titles[task]+' Receiver operating characteristic')
    plt.plot([0, 1], [0, 1], color='k', lw=2, linestyle='--')
    #for c in range(n_classes):
    #    plt.plot(fpr[c], tpr[c], lw=1, label='ROC curve {} (area = {:0.2f})'.format(c, roc_auc[2]))
    plt.plot(fpr[task]['micro'], tpr[task]['micro'], color='blue', lw=2, label='ROC micro (area = %0.4f)' % roc_auc[task]['micro'])
    plt.plot(fpr[task]['macro'], tpr[task]['macro'], color='red', lw=2, label='ROC macro (area = %0.4f)' % roc_auc[task]['macro'])
    plt.legend(loc="lower right")
    plt.savefig(outputPlotDir+"/roc-"+task+".pdf", bbox_inches='tight')
    plt.show()

In [None]:
for task in tasks:
    fig = plt.figure()
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
 
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(titles[task]+' Precision Recall curve')
    #for c in range(n_classes):
    #    plt.plot(fpr[c], tpr[c], lw=1, label='ROC curve {} (area = {:0.2f})'.format(c, roc_auc[2]))
    plt.plot(rec[task]['micro'], pre[task]['micro'], color='blue', lw=2, label='P/R micro (area = %0.4f)' % pr_auc[task]['micro'])
    plt.plot(rec[task]['macro'], pre[task]['macro'], color='red', lw=2, label='P/R macro (area = %0.4f)' % pr_auc[task]['macro'])
    plt.legend(loc="lower left")
    plt.savefig(outputPlotDir+"/pr-"+task+".pdf", bbox_inches='tight')
    plt.show()