## 1. Experiments setups

If you not yet install NLIMED, please follow the [installation step](#scrollTo=rQWn0QqFZzM7)

### Initiating required methods

In [None]:
try:
    import ujson as json
except:
    import json
    
import time
import shutil
import os
from NLIMED import __file__
from mpl_toolkits import mplot3d
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import math
import copy
import re

dest = os.path.join(os.path.dirname(__file__), 'indexes')
source = source = 'pmr_inv'

# type of parsers
parsers = ['ncbo', 'benepar', 'stanza', 'coreNLP', 'xStanza']

# indexing strategies
idxMethods = ['wpure', 'wpl', 'wple']

# term frequency calculation
tfModes = ['mode1', 'mode2', 'mode3']

def moveIndex(idxMode):
    files = os.listdir(os.path.join(source,idxMode))
    for file in files:
        fileSrc = os.path.join(source,idxMode,file)
        fileDest = os.path.join(dest,file)
        shutil.copy(fileSrc,fileDest)

## 2. Experiment 1: NLQ Annotator Performance

This experiment is aimed to measure NLIMED performance in annotating natural language query into ontology classes. 

We have 52 data test consisting of queries and their related ontology classes annotated manually by expert. The queries have one to fourteen terms and one to four ontology class. This data test along with the experiment using jupyter is available at [Github](https://github.com/napakalas/NLIMED/tree/experiment).

For the measurement, we calculate Area Under Curves of Precision and Recal ($AUC_{PR}$) for different parsers, term frequency calculations, and indexing strategies.

Parsers:
  * Benepar
  * CoreNLP
  * Stanza
  * xStanza
  * NCBO => cannot calculate $AUC_{PR}$

Term frequency calculations:
  * mode1: using similarity equation with all features and dependency level feature
  * mode2: the tf is not utilising dependency
  * mode3: using mode1 but the tf weight is selected for one feature with the highest value

Indexing strategies:
  * wpure: indexing all terms into each features
  * wpl  : adding terms in prefered label to other features
  * wple : adding terms in prefered label to empty features

Please run **experiment setup** before proceeding this experiment.

### Performance based on $AUC_{PR}$

#### Get $AUC_{PR}$ for all settings 

Beware, generating $AUC_{PR}$ **will take days**. 

If you just interested with the final result and the generated graph, you can jump to **the next subsection**.

Since the generated data is quite big, we store it in files rather than manage it directly in the runtime data structure. Later we load the files for further analysis.

In [None]:
# calculating AUC_PR
from itertools import product
for idxMethod, tfMode, parser in product(idxMethods, tfModes, parsers):
    if parser == 'ncbo': continue
    mode = int(tfMode[-1])
    moveIndex(idxMethod)
    nli = NLIMED(repo='pmr', parser=parser, tfMode=mode)
    stats = nli.auc('dataSource/DataTest.json')
    # save to Json
    filename = idxMethod + '_auc_' + tfMode + '_' + parser
    filename = "saveFile/"+filename + "_" + str(time.time()) + ".json"
    with open(filename, 'w') as fp:
        json.dump(stats, fp)

#### Load all $AUC_{PR}$ from files

Loading large amount of files to dictionary will take minutes. The following code will load $AUC_{PR}$ files and save their summary into a summary and dataframe files. If the summary and dataframe files are availabel, the code will load it rather than the $AUC_{PR}$ files.

In [None]:
import time, gc

def loadAucPRData(filename = ""):
    columns = ['idxMethod', 'tfMode', 'parser', '\u03B1 (preferred label)', '\u03B2 (synonym)', '\u03B3 (definition)', '\u03B4 (parent label)', '\u03B8 (description)', 'auc']
    dfAucPR = pd.DataFrame(columns=columns)
    dataAucPR = {}
    
    if len(filename)>0: ## load from a summary file
        try:
            with open('saveFile/'+filename+'.json', 'r') as fp:
                dataAucPR = json.load(fp)
            dfAucPR = pd.read_csv('saveFile/'+filename+'.csv')
            return dataAucPR, dfAucPR
        except:
            print("\x1b[31m\"File "+filename+" is not exist\"\x1b[0m")
    
    with open(os.path.join('saveFile','pure_10_ncbo_1613804506.2163131.json'), 'r') as fp:
        dataNcbo = json.load(fp)
    files = sorted([file for file in os.listdir('saveFile') if '_auc_mode' in file], reverse=False)
    files = list({file.rsplit('_', 1)[0]:file for file in files}.values())
    for file in files:
        idxMethod, tfMode, parser = [file.split('_')[0]]+file.split('_')[2:4]
        data = dataAucPR[idxMethod] if idxMethod in dataAucPR else {}
        if 'ncbo' not in data: data['ncbo'] = dataNcbo
        if tfMode not in data: data[tfMode] = {}
        if parser not in data[tfMode]:
            start = time.time()
            print("Loading .. %s, %s, %s, %s"%(idxMethod, parser, tfMode, file))
            with open(os.path.join('saveFile',file), 'r') as fp:
                loaded = json.load(fp)
            idx = loaded['settings'].index(loaded['maxAuc100']['settings'][0])
            data[tfMode][parser] = {'maxAuc100':loaded['maxAuc100'], 'maxAuc70':loaded['maxAuc70'], 'maxAuc50':loaded['maxAuc50'], 'cutoffs':loaded['cutoffs'][idx]}
            print("Finish loading in %d seconds, start store to df"%(time.time()-start))
            
            #load to dataframe
            pref = [[idxMethod, tfMode, parser]] * len(loaded['settings'])
            suff = [loaded['auc100']]
            from operator import itemgetter
            itg = itemgetter(*filter((len(loaded['settings'][0])-1).__ne__, range(len(loaded['settings'][0]))))
            med = list(map(list, map(itg, loaded['settings'])))
            m = map(list.__add__, pref, med)
            l = list(map(list, zip(*m)))+suff
            d = list(map(list,zip(*l)))
            dfAucPR = dfAucPR.append(pd.DataFrame(data=d, columns=columns))
            
            del loaded
            print("Finished in %d seconds"%(time.time()-start))
        dataAucPR[idxMethod] = data
    
    # save dataAucPR to file
    with open('saveFile/'+filename+'.json', 'w') as fp:
        json.dump(dataAucPR, fp)
    # save dfAucPR to file
    dfAucPR.to_csv('saveFile/'+filename+'.csv')
    print('Done loading data ...')
    return dataAucPR, dfAucPR

dataAucPR, dfFeatAnalysis = loadAucPRData('nlimedSummaryResults')

#### Draw the interpolated plot for precision and recall ($AUC_{PR}$ graphs)


In [None]:
def mean(a):
    return sum(a) / len(a)

def drawAllAucPR(isVerbose=False, isInterpolated=False, isLatex=False):
    tfModes = {'mode1':'mode_1', 'mode2':'mode_2', 'mode3':'mode_3'}
    idxMethods = {'wpure', 'wpl', 'wple'}
    aucTypes = ['maxAuc100']
    fontsize = 14
    
    fig, axs = plt.subplots(3, 3,figsize=(15,15), sharex=True, sharey=True)
        
    linestyles = [(0, (3, 5, 1, 5)), (0, (3, 1, 1, 1)), (0, (1, 1)), (0, (5, 1)), (0, (5, 5)), (0, (3, 1))]
    for aucType in aucTypes:
        print(aucType)
        prevMode, prevMethod = '',''
        for row, idxMethod in enumerate(idxMethods):
            for col, tfMode in enumerate(tfModes):
                if tfMode not in dataAucPR[idxMethod]: continue
                for count, parser in enumerate(parsers):
                    if parser in dataAucPR[idxMethod][tfMode]:
                        settings = dataAucPR[idxMethod][tfMode][parser][aucType]['settings']
                        maxAlpha = max(list(map(list, zip(*settings)))[0])
                        recalls = dataAucPR[idxMethod][tfMode][parser][aucType]['recalls']
                        precisions = dataAucPR[idxMethod][tfMode][parser][aucType]['precisions']
                        if isInterpolated:
                            i=len(recalls)-2
                        # interpolation...
                        while i>=0:
                            if precisions[i+1]>precisions[i]:
                                precisions[i]=precisions[i+1]
                            i=i-1
                        
                        axs[row,col].plot(recalls[:],precisions[:],linestyle=linestyles[count],label=parser,linewidth=1.5)
                        
                        if isVerbose:
                            print('%s:%f, \tsetting rate:'%(parser,dataAucPR[idxMethod][tfMode][parser][aucType]['auc']), end=' ')
                            print(*map(mean, zip(*settings)))
                            print('\trecalls',recalls[-20:])
                            print('\tprecisions',precisions[-20:])
                        header=''
                        header = idxMethod + ' & ' if idxMethod != prevMethod else ' & '
                        header += tfModes[tfMode].replace('_','\_') + ' & ' if tfMode != prevMode else ' & '
                        prevMode, prevMethod = tfMode, idxMethod
                        if isLatex:
                            print(header, parser, ' & ', tuple(map(mean, zip(*settings)))[:-1], ' & ', round(dataAucPR[idxMethod][tfMode][parser][aucType]['auc'],3), ' \\\\')
        
                ncboPrecision = dataAucPR[idxMethod]['ncbo']['maxSetting']['precision']
                ncboRecall = dataAucPR[idxMethod]['ncbo']['maxSetting']['recall']
                axs[row,col].plot(ncboRecall,ncboPrecision,label='ncbo',marker=".", markersize=15)
                axs[row,col].grid()
                if row==0:
                    axs[0,col].text(0.5, 1.075, tfModes[tfMode], ha='center', fontweight='bold',size=fontsize)
                if col==2:
                    axs[row,2].text(1, 0.57, idxMethod, va='center', rotation=270, fontweight='bold',size=fontsize)
                
    print('ncbo precision: %f, recall: %f'%(ncboRecall,ncboPrecision))
    for ax in axs.flat:
        ax.set_xlabel('recall', fontsize=fontsize)
        ax.set_ylabel('precision', fontsize=fontsize)

    # Hide x labels and tick labels for top plots and y ticks for right plots.
    for ax in axs.flat:
        ax.label_outer()
    handles, labels = ax.get_legend_handles_labels()
    fig.tight_layout()
    fig.subplots_adjust(right=0.88, top=0.88) 
    fig.legend(handles, labels, loc=7, fontsize='large')
    fig.savefig('saveFigures/PR-AUC.pdf',dpi=10000)            
    
drawAllAucPR(isVerbose=False, isInterpolated=True, isLatex=True)

### The role of feature

We are sure that preferred label and synonym are the most significant features, therefore, now we investigate the role of other features, i.e. definition, parent label, and model description.

In [None]:
# a method to draw the role features 
def drawPlotPairGrid(idxMethod, tfMode, parser):
    print(parser)
    variables = ['\u03B2 (synonym)', '\u03B3 (definition)', '\u03B4 (parent label)', '\u03B8 (description)']
    df = dfFeatAnalysis
    df = df.loc[(df['parser'] == parser)&(df['idxMethod'] == idxMethod)&(df['tfMode'] == tfMode)]
    
    g = sns.PairGrid(df, hue="auc", vars=variables, corner=True)
    g.map_diag(sns.kdeplot, lw=3, hue=None, color="0.4")
    g.map_offdiag(sns.scatterplot)
    g.add_legend()
    plt.savefig("saveFigures/feature_role_" + idxMethod + '_' + parser + '.pdf',dpi=300)

# draw the role for all setings
from itertools import product
for idxMethod, tfMode, parser in product(idxMethods, tfModes, parsers):
    if parser == 'ncbo': continue
    drawPlotPairGrid(idxMethod,tfMode,parser)


#### Draw the role of feature (3D)

In [None]:
def drawPlot3DHeatmap(idxMethod, parser):
    df = dfFeatAnalysis
    df = df.loc[(df['parser'] == parser)&(df['idxMethod'] == idxMethod)&(df['tfMode'] == 'mode3')]
    # Creating dataset
    g = df['\u03B3 (definition)'].tolist()
    d = df['\u03B4 (parent label)'].tolist()
    t = df['\u03B8 (description)'].tolist()
    auc = df['auc'].tolist()
    

    # Creating figure
    fig = plt.figure(figsize = (16, 9))
    ax = plt.axes(projection ="3d")

    # Add x, y gridlines
    ax.grid(b = True, color ='grey',
            linestyle ='-.', linewidth = 0.3,
            alpha = 0.2)


    # Creating color map
    my_cmap = plt.get_cmap('twilight')

    # Creating plot
    sctt = ax.scatter3D(g, d, t,
                        alpha = 0.8,
                        c = auc,
                        cmap = my_cmap,
                        marker ='o')

    plt.title(parser)
    ax.set_xlabel('\u03B3 (definition)', fontweight ='bold')
    ax.set_ylabel('\u03B4 (parent label)', fontweight ='bold')
    ax.set_zlabel('\u03B8 (description)', fontweight ='bold')
    fig.colorbar(sctt, ax = ax, shrink = 0.5, aspect = 5, )

    # show plot
    plt.show()
    
    fig.savefig("saveFigures/three_feature_role_" + idxMethod + '_' + parser + '.pdf',dpi=400)
    
drawPlot3DHeatmap('wpl','coreNLP')
drawPlot3DHeatmap('wpl','benepar')
drawPlot3DHeatmap('wpl','stanza')
drawPlot3DHeatmap('wpl','xStanza')

### Performance based on query length

To annotated phrases in query to an ontology class, we calculate the degree of association where higher value is more likely to be the relevant annotation. However, there is no guidance for the exact degree of association. Therefore we investigate the best minimum value or cutoff of the degree of association. Using the best cuoff, we measure NLQ Annotator based on the query length.

#### Get the best cutoff for WPL and Mode3

We focus on WPL and Mode3 because they show the highest performance. 

If you just interested with the final result and the generated graph, you can jump to **the next subsection**

In [None]:
def getCompleteSettingAndCutoff(idxType, tfMode, parser, recallType, limit=0):
    print(idxType, tfMode, parser, recallType)
    maxResult = dataAucPR[idxType][tfMode][parser][recallType]
    meanMaxSetting = np.mean(np.array(maxResult['settings']),axis=0)
    idx = dataAucPR[idxType][tfMode][parser]['maxAuc100']['settings'].index(maxResult['settings'][0])
    precisions = dataAucPR[idxType][tfMode][parser][recallType]['precisions']
    recalls = dataAucPR[idxType][tfMode][parser][recallType]['recalls']
    cutoffs = dataAucPR[idxType][tfMode][parser]['cutoffs'][:len(recalls)]
    data = []
    for i, cutoff in enumerate(cutoffs):
        fmeasure = (precisions[i]*recalls[i])/(precisions[i]+recalls[i])*2
        precLimit = 1/(1+(recalls[i]*limit))
        data += [{'cutoff':cutoff, 'recall':recalls[i], 'precision':precisions[i], 'fmeasure':fmeasure, 'preclimit':precLimit}]
    df = pd.DataFrame(data)
    df = df.sort_values(['fmeasure', 'precision'], ascending=False)
    
    # get rate of cutoff and return number
    maxFmeasure = df['fmeasure'][:50].max()
    dfMax = df.loc[df['fmeasure'] == maxFmeasure]
    maxCutoff = dfMax['cutoff'].map(lambda x: x[0]).mean()
    maxReturn = math.floor(dfMax['cutoff'].map(lambda x: x[1]).mean())
    
    setting = list(meanMaxSetting[:-1]) + [maxCutoff, maxReturn]
    return df, setting

In [None]:
# initialising to get result best setting
with open('dataSource/DataTest.json','r') as fp:
    dataTest = json.load(fp)

def _getURICode(uri):
    # function to get ontology class ID rather than full url
    import re
    partUri = uri[uri.rfind('/')+1:].lower()
    regex = re.compile('[^a-z0-9]')
    partUri = regex.sub('', partUri)
    return partUri

resultBestSetting = {'benepar':{}, 'stanza':{}, 'coreNLP':{}, 'xStanza':{}, 'ncbo':{}}


In [None]:
# Repeat this command several time until you get all data. 
# Sometime ncbo services is not availabel due to inactive server.
# It will take approximately 10 minutes to run
for parser in parsers:
    nli = NLIMED(repo='pmr', parser=parser, quite=True)
    if 'class' in resultBestSetting[parser]:
        if len(resultBestSetting[parser]['class']) > 0: continue
    if parser != 'ncbo':
        df, setting = getCompleteSettingAndCutoff('wpl','mode3',parser,'maxAuc100',1)
        setting += [3] #mode_3 for selecting the highest feature only
        nli.setWeighting(*setting)
        print('Best setting: ', setting)
    handler = resultBestSetting[parser]
    handler.update({'term':{},'class':{}})

    # sometime, ncbo services is not available
    try:
        numCorrect, numReturn, numPositive = 0,0,0
        for count, queryTest in enumerate(list(dataTest.values())):
            
            annotation = nli.getAnnotated(queryTest['query'])
            queryTest['annotation'] = [_getURICode(cls) for cls in queryTest['annotation']]
            numOfClasses = len(queryTest['annotation'])
            numOfTerms = len(queryTest['query'].split())
            numPositive += numOfClasses
            
            if numOfClasses not in handler['class']: handler['class'][numOfClasses]=[]
                
            if numOfTerms not in handler['term']: handler['term'][numOfTerms]=[]
            
            if len(annotation['result']) > 0:
                annon = [_getURICode(d) for e in annotation['result'] for d in e[0]]
                numAnnonCorrect = len(set(queryTest['annotation']) & set(annon))
                numCorrect += numAnnonCorrect
                numReturn += len(set(annon))
                handler['class'][numOfClasses] += [(numAnnonCorrect,len(set(annon)))]
                handler['term'][numOfTerms] += [(numAnnonCorrect,len(set(annon)))]
            else:
                handler['class'][numOfClasses] += [(0,0)]
                handler['term'][numOfTerms] += [(0,0)]
                
            if count%10==0: print(count,end=' ') 
        precision = numCorrect/numReturn
        recall = numCorrect/numPositive

        print('\nrecall ',recall, numCorrect, numPositive)
        print('precision',precision, numCorrect, numReturn)
        print('f-measure', precision*recall*2/(precision+recall))
    
    except:
        pass

# save result
with open('saveFile/resultBestSetting.json', 'w') as fp:
    json.dump(resultBestSetting, fp)

#### Load the analysis result

In a case you just need the result analysis, load this file.

In [None]:
# if you already saved the best setting, you can just load it
with open('saveFile/resultBestSetting.json', 'r') as fp:
    resultBestSetting = json.load(fp)

#### Draw the performance plot based on query length

In [None]:
def drawPlot(stats,statType,xlabel):
    nums, precs, recs, fmeas, group = [], [], [], [], []
    
    df = pd.DataFrame(columns=['parser','statType', 'precision', 'recall', 'fmeasure', 'number'])
    
    for label, stat in stats.items():
        st = {int(k):v for k,v in stat[statType].items()}
        st = dict(sorted(st.items()))
        for k, v in st.items():
            t_v = list(map(list, zip(*v)))
            prec = sum(t_v[0])/sum(t_v[1]) if sum(t_v[1])>0 else 1.
            rec = sum(t_v[0])/(int(k)*len(v))
            try:
                fmeasure = prec * rec *2 /(prec+rec)
            except:
                fmeasure = 0
            df.loc[len(df.index)] = [label, 'ontology class' if statType=='class' else statType, prec, rec, fmeasure, str(k)]

    sns.set_theme(style="ticks", color_codes=True)
    
    p = sns.relplot(x='number',y='fmeasure', 
                hue='parser', aspect=1, height=4, 
                size='precision',
                col='statType',
                data=df,
                sizes=(10, 200),
               )
    p.set_titles(row_template = '{row_name}', col_template = '{col_name}')
    p._legend.set_title('')
    plt.savefig('saveFigures/'+xlabel + '.pdf',dpi=300)
#     plt.grid()
    

drawPlot(resultBestSetting, 'class', 'number of phrases per query')
drawPlot(resultBestSetting, 'term', 'number of terms per query')

## 2. Experiment 2: NLIMED Behaviour on Native Query in PMR (Historical Data)

We have collect historical data from the PMR query logs regarding user query in its search feature and the biological models relevant to the query.

The data is differentiated based on the co-occurence of terms in the query and the ontology classes in the relevant model. There are three types of query-model pairs:
  * 0 co-occurrence
  * 0 < co-occurrence <= 0.5
  * 0.5 < co-occurrence

Please run **experiment setup** before proceeding this experiment.

### Shows PMR statistical data

In [None]:
# Load ontology dictionaries
from nltk.corpus import stopwords
import gzip, pickle
file = gzip.GzipFile('dataSource/pmr_onto.gz', 'rb')
ontologies = pickle.load(file)
file.close()

# Load stopwords
stopwords = set(stopwords.words('english'))

# Organise K,V where K is ClassID, V is a set of terms in the ontology class
ontoClasses = {}
for ontoName, ontology in ontologies.items():
    for classID, features in ontology['data'].items():
        classID = (''.join(re.split('_|:', classID))).replace('OPB#','')
        feats = [features[0]] + features[1]
        terms = []
        for feat in feats:
            if isinstance(feat, str):
                terms += [term for term in feat.lower().split()]
        terms = set(terms) - stopwords
        ontoClasses[classID] = terms

In [None]:
# Normalised link related to model
def normalisedLink(link):
    spts = link.split('/rawfile/')
    if len(spts) > 1:
        return '/'.join([spts[0],'rawfile','HEAD',spts[-1][spts[-1].find('/')+1:]])
    return link

# Normalised full format ClassID to shorter ClassID
def getShortID(bioClass):
    txtClass = bioClass.replace('<', '').replace('>', '').strip(' \t\n\r')
    if txtClass[0:4] == 'http':
        oboId = txtClass[txtClass.rfind('/') + 1:]
        if any(x in oboId for x in ['_', ':']):
            oboId = oboId.replace('_', ':')
            return oboId[0:oboId.find(':')] + oboId[oboId.find(':') + 1:]
    return ''

# Show PMR statistic
print("""STATISTIC PMR""")
with open('dataSource/listOfObjects.txt', 'r') as fp:
    lines = fp.readlines()
print("Number of objects: ",len(lines))
newLines = set([line[:-1] for line in lines if line[:4]=='http' and 'models' not in line])
print("Number of ontology links: ",len(newLines))
newLines = set([line[line.rfind('/')+1:].replace(':','_') for line in newLines])
print("Number of distinct ontologies: ",len(newLines),"\n")
with open('dataSource/rdfPaths.json', 'r') as fp:
    rdfPaths = json.load(fp)
print("Number of links with rdf", len(rdfPaths))

# Organise link annotated with ontology classes to validLinks
# Organise ontology classes used in link to validOnto
validLinks = {}
validOnto = []
for rdfPath in rdfPaths:
    triples = rdfPath['paths']
    for triple in triples:
        if triple['o'][:4] == 'http' and 'models.phys' not in triple['o']:
            # normalised the link
            link = normalisedLink(rdfPath['link'])
            classID = getShortID(triple['o'])
            if classID in ontoClasses:
                if link not in validLinks: validLinks[link] = {'terms':[],'classes':{}}
                validLinks[link]['terms'] += ontoClasses[classID]
                validLinks[link]['classes'][classID] = ontoClasses[classID]
                validOnto += [classID]
validOnto = set(validOnto)
for k in validLinks.keys(): 
    validLinks[k]['terms'] = set(validLinks[k]['terms'])
print("Number of models annotated to ontology class: ",len(validLinks))
print("Number of URIs Ontology Classes: ",len(validOnto))

### Data test preparation

In [None]:
# Load query and answers from query log
site = 'https://models.physiomeproject.org/'
with open('dataSource/query_workspace.json','r') as fp:
    queryWorkspaces = json.load(fp)
print('The total number of raw query and answer is: ',len(queryWorkspaces))

# Normalise all query and answers
for k, v in queryWorkspaces.items():
    for i in range(len(v)-1,-1,-1):
        v[i] = normalisedLink(v[i])

# Load cluster
with open('dataSource/cellmlClusterer.json','r') as fp:
    cellmlClusters = json.load(fp)
    
# Enrich query and answers with cluster
for k, v in queryWorkspaces.items():
    links = set(v)
    for link in v:
        if link in cellmlClusters['url2Cluster']:
            links.update(set(cellmlClusters['cluster'][cellmlClusters['url2Cluster'][link]]))
    queryWorkspaces[k] = links

# Remove query and answers where the answer is not annotated with ontology classes
for k, v in queryWorkspaces.copy().items():
    for link in v.copy():
        if (site + link) not in validLinks: 
            v.remove(link)
    if len(v) == 0:
        queryWorkspaces.pop(k)

print('The number of query and answer annotated with ontology classes: ',len(queryWorkspaces))

In [None]:
# Filter for queries having terms indexed by ontology classes (Considering preferred label and synonym only)
nli = NLIMED(repo='pmr', parser='CoreNLP', pl=1, alpha=3, beta=2, gamma=0, delta=0, theta=0, cutoff=0, tfMode=1, quite=True)

native_QR_PMR = {}
for count, (k, v) in enumerate(queryWorkspaces.items()):
    annotated = nli.getAnnotated(k)
    if len(annotated['result']) > 0:
        native_QR_PMR[k] = {link:validLinks[site+link] for link in v if site+link in validLinks}
        
    if count%20==0: print(count,end=' ')

print('The number of query and answes annotated with ontology classes, considering preferred label and synonym only: ', len(native_QR_PMR))

In [None]:
# Filter native query to remove a model where it's class ontology features do not contain terms in its query
onto_native_QR_PMR = {}
for q, vals in native_QR_PMR.items():
    for link in vals:
        if site+link in validLinks:
            for t in q.split():
                if t in validLinks[site+link]['terms']:
                    onto_native_QR_PMR[q] = native_QR_PMR[q]
print('The number of query and answer where terms in ontology classes appear in query: ', len(onto_native_QR_PMR))

In [None]:
# Get a class with maximum proportion of terms in query to terms in class ontology
# onto_native_QR_PMR_max ==> query to max poportion in onto_native_QR_PMR
# native_QR_PMR_max ==> query to max poportion in native_QR_PMR

onto_native_QR_PMR_max = {}
for q, v in onto_native_QR_PMR.items():
    onto_native_QR_PMR_max[q] = 0
    qTerms = set(q.split())
    for link in v:
        data = validLinks[site+link]
        for classID, classTerms in data['classes'].items():
            if len(qTerms&classTerms)/len(classTerms) > 0.: 
                if len(qTerms&classTerms)/len(classTerms) > onto_native_QR_PMR_max[q]:
                    onto_native_QR_PMR_max[q] = len(qTerms&classTerms)/len(classTerms)

native_QR_PMR_max = onto_native_QR_PMR_max.copy()
native_QR_PMR_max.update({q:0 for q in native_QR_PMR if q not in onto_native_QR_PMR_max})

In [None]:
# Save/load the founded query-results pairs from query logs
if 'native_QR_PMR' in globals():
    # modify set data type in pairs to list so it can be saved to json
    native_QR_PMR = {k:list(v) for k, v in native_QR_PMR.items()}
    with open('saveFile/native_QR_PMR.json', 'w') as fp:
        json.dump(native_QR_PMR, fp)
else:
    with open('saveFile/native_QR_PMR.json', 'r') as fp:
        native_QR_PMR = json.load(fp)

In [None]:
# Get query results utilising CoreNLP NLIMED (supposed to be the best setting)
def getNliResults(nli):
    nliResults = {}
    for i, (k, v) in enumerate(native_QR_PMR.items()):
        if i%20==0: print(i, end=' ')
        models = nli.getModels(k)
        nliResults[k] = []
        for result in models['results']:
            model = result['graph'].replace('https://models.physiomeproject.org/','') \
                    + '/rawfile/HEAD/' + result['Model_entity'][:result['Model_entity'].find('#')]
            if model not in nliResults[k] and not model.endswith('sedml'): nliResults[k] += [model]
    return nliResults


In [None]:
# Get Mean Average Precision (MAP)## GET MEAN AVERAGE PRECISION (MAP)
def getMAP(results, reverence, k, isVerbose = False):
    totIdentify = 0
    totAP = 0
    for query, relModels in reverence.items():
        relevances = [0]*k
        totRelevant = 0
        ap = 0
        for i in range(k):
            if len(results[query]) > i:
                if results[query][i] in relModels:
                    relevances[i] = 1
                    ap += sum(relevances[0:i+1])/(i+1)
                    if isVerbose:
                        print(i+1, ap, sum(relevances[0:i+1]),relevances, query)
                    totRelevant += 1
        if totRelevant > 0:
            totAP += ap/totRelevant
            totIdentify += 1
    return {'map':totAP/len(reverence), 'rate':totIdentify/len(reverence), 'identify':totIdentify, 'totRel':len(reverence)}

### Calculate Mean Average Precision (MAP) for each setting

In [None]:
# Load NLIMED best setting from nlimed summary results
with open('saveFile/nlimedSummaryResults.json', 'r') as fp:
    nliSettings = json.load(fp)

In [None]:
# Analysis of the best cutoff and the number of return
# Returning dataframe of cutoff and number return and the best setting
dataAucPR = nliSettings
def analyseData(idxType, tfMode, parser, recallType, limit=0):
    print(idxType, tfMode, parser, recallType)
    maxResult = dataAucPR[idxType][tfMode][parser][recallType]
    print('Best settings: ',maxResult['settings'])
    meanMaxSetting = np.mean(np.array(maxResult['settings']),axis=0)
    print('Mean of best setting: ', meanMaxSetting)
    idx = dataAucPR[idxType][tfMode][parser][recallType]['settings'].index(maxResult['settings'][0])
    precisions = dataAucPR[idxType][tfMode][parser][recallType]['precisions']
    recalls = dataAucPR[idxType][tfMode][parser][recallType]['recalls']
    cutoffs = dataAucPR[idxType][tfMode][parser]['cutoffs'][:len(recalls)]
    data = []
    for i, cutoff in enumerate(cutoffs):
        fmeasure = (precisions[i]*recalls[i])/(precisions[i]+recalls[i])*2
        precLimit = 1/(1+(recalls[i]*limit))
        data += [{'cutoff':cutoff, 'recall':recalls[i], 'precision':precisions[i], 'fmeasure':fmeasure, 'preclimit':precLimit}]
    df = pd.DataFrame(data)
    df = df.sort_values(['fmeasure', 'precision'], ascending=False)
    
    # get rate of cutoff and return number
    maxFmeasure = df['fmeasure'][:50].max()
    dfMax = df.loc[df['fmeasure'] == maxFmeasure]
    maxCutoff = dfMax['cutoff'].map(lambda x: x[0]).mean()
    maxReturn = math.floor(dfMax['cutoff'].map(lambda x: x[1]).mean())
    maxCutoff, maxReturn
    
    setting = list(meanMaxSetting[:-1]) + [maxCutoff, maxReturn]
    return df, setting

In [None]:
# Get results from NCBO
nliNcbo = NLIMED(repo='pmr', parser='ncbo')
nliNcboResult = getNliResults(nliNcbo)
ncboMap5 = getMAP(nliNcboResult, native_QR_PMR,5)
ncboMap10 = getMAP(nliNcboResult, native_QR_PMR,10)

ncboMap1000 = getMAP(nliNcboResult, native_QR_PMR,10)

In [None]:
# Get MAP for each setting

dest = os.path.join(os.path.dirname(__file__), 'indexes')
source = os.path.join('pmr_inv')

def getMapForAllSettings(isBest = False, cutoff=0, pl=0, multipliers=None, resultSettings={}):
    for idxMode, v1 in resultSettings.items():
        if idxMode=='ncbo': continue
        moveIndex(idxMode)
        for tfMode, v2 in v1.items():
            for parser, v3 in v2.items():
                if isinstance(v3,list) or 'maxAuc100' not in v3: continue
                df, setting = analyseData(idxMode, tfMode, parser,'maxAuc100')
                if pl > 0: setting[-1] = pl
                if not isBest:
                    setting[-2] = cutoff
                    if multipliers != None:
                        for i, v in enumerate(multipliers): setting[i]=v
                if 'results' in v3['maxAuc100']: 
                    print(idxMode, tfMode, parser, setting, 'SKIPPED')
                    continue 
                print(idxMode, tfMode, parser, setting)
                nli = NLIMED(repo='pmr', parser=parser)
                nli.setWeighting(*setting)
                nliResult = getNliResults(nli)
                v3['maxAuc100']['results'] = nliResult
                v3['maxAuc100']['map5'] = getMAP(nliResult, native_QR_PMR,5)
                v3['maxAuc100']['map10'] = getMAP(nliResult, native_QR_PMR,10)
                v3['maxAuc100']['map1000'] = getMAP(nliResult, native_QR_PMR,1000)
                print('\nMAP5:',v3['maxAuc100']['map5'],' \tMAP10', v3['maxAuc100']['map10'],'\n')
    # Set result from NCBO
    resultSettings['ncbo'] = {'results':nliNcboResult, 'map5':ncboMap5, 'map10':ncboMap10, 'map1000':ncboMap1000}
    # Save to file
    addName = '_'+'_'.join(str(m) for m in multipliers) if multipliers != None else ''
    addName += '_best' if isBest else '_'+str(cutoff)
    addName += '_best' if pl == 0 else '_'+str(pl)
    with open('saveFile/nlimedSummaryResultsComplete'+addName+'.json', 'w') as fp:
        json.dump(resultSettings, fp)
        
# initialised resultSettings
resultSettings = []
for i in range(6):
    resultSettings += [copy.deepcopy(nliSettings)]

In [None]:
getMapForAllSettings(isBest=True, resultSettings=resultSettings[0])
getMapForAllSettings(isBest=True, pl=5, resultSettings=resultSettings[1])
getMapForAllSettings(isBest=False, cutoff=0, pl=5, resultSettings=resultSettings[2])
getMapForAllSettings(isBest=False, cutoff=0, pl=0, resultSettings=resultSettings[3])
getMapForAllSettings(isBest=False, cutoff=0, pl=5, multipliers=[3,3,1,1,1], resultSettings=resultSettings[4])
getMapForAllSettings(isBest=False, cutoff=0, pl=5, multipliers=[3,3,0.5,0.5,0.5], resultSettings=resultSettings[5])

### Analyse MAP results

In [None]:
# Compare query result to referrence
import pprint
import operator
def compareTo(nlimedResultsFile, dataToCompare, isVerbose=True):
    with open(nlimedResultsFile, 'r') as fp:
        nliSettings = json.load(fp)
    tfModes = {'mode1':'mode_1', 'mode2':'mode_2', 'mode3':'mode_3'}
    mapDf = pd.DataFrame(columns=['tfIndex','tfMode', 'parser','mAP@5','mAP@10','mAP'])
    commonFound = None
    statCommonFound = {k:0 for k,v in dataToCompare.items()}
    for idxMode, v1 in nliSettings.items():
        if idxMode=='ncbo': continue
        for tfMode, v2 in v1.items():
            if tfMode == 'ncbo': continue
            for parser, v3 in v2.items():
                map5 = getMAP(v3['maxAuc100']['results'], dataToCompare,5)
                map10 = getMAP(v3['maxAuc100']['results'], dataToCompare,10)
                map1000 = getMAP(v3['maxAuc100']['results'], dataToCompare,1000)
                mapDf.loc[len(mapDf.index)] = [idxMode, tfModes[tfMode], parser, map5['map'], map10['map'], map1000['map']]
    return mapDf

In [None]:
# Separate based on the max of terms in query and terms in ontology class proportion
mapDf = pd.DataFrame(columns=['tfIndex','tfMode', 'parser','mAP@5','mAP@10','mAP','proportion(p)'])
propLabels = {'0':'p=0', '0.5':'0<p<=0.5', '1.0':'0.5<p<=1.0'}
propSetting = [0,0.5,1.0]
native_QR_PMR_prop ={ps:{} for ps in propSetting}
for q, v in native_QR_PMR.items():
    prop = native_QR_PMR_max[q]
    for ps in propSetting:
        if prop <= ps:
            native_QR_PMR_prop[ps].update({q:v})
            break

# Get MAP for each proportion and defferent nlimedResults
nlimedResultsFiles=[
                   'saveFile/nlimedSummaryResultsComplete_0_5.json',
                   'saveFile/nlimedSummaryResultsComplete_3_3_1_1_1_0_5.json',
                   'saveFile/nlimedSummaryResultsComplete_3_3_0.5_0.5_0.5_0_5.json',
                    ]
for nlimedResultsFile in nlimedResultsFiles:
    for prop, native_prop in native_QR_PMR_prop.items():
        df = compareTo(nlimedResultsFile, native_prop, isVerbose=False)
        df['proportion(p)'] = [propLabels[str(prop)]] * len(df.index)
        mapDf = mapDf.append(df)

In [None]:
# Plot results
sns.set_theme(style="ticks", color_codes=True)

p = sns.catplot(x='proportion(p)',y='mAP@10', 
            hue='parser', aspect=1, height=4, 
            row='tfMode', col='tfIndex', 
            margin_titles=True,
            data=mapDf,
            hue_order = ['benepar','stanza','coreNLP','xStanza'],
            kind = 'box', 
#             split=True;
           )
# sns.set_theme(style="ticks")
p.set_titles(row_template = '{row_name}', col_template = '{col_name}')
p._legend.set_title('')
plt.savefig('saveFigures/nlimed_native_behaviour.pdf',dpi=300, bbox_inches="tight")

In [None]:
# Plot results
sns.set_theme(style="ticks", color_codes=True)

p = sns.catplot(x='proportion(p)',y='mAP@10', 
            hue='parser', aspect=1, height=4, 
            # row='tfMode', col='tfIndex', 
            margin_titles=True,
            data=mapDf,
            hue_order = ['benepar','stanza','coreNLP','xStanza'],
            kind = 'box', 
           )
# sns.set_theme(style="ticks")
p.set_titles(row_template = '{row_name}', col_template = '{col_name}')
p._legend.set_title('')
plt.savefig('saveFigures/nlimed_native_behaviour.pdf',dpi=300, bbox_inches="tight")