In [53]:
import numpy as np
import random
import re
files = ['KP100k.tsv']

In [117]:
urls, judgements, p = {}, {}, []
for file in files:
    with open(file,'r') as f:
        for l in f:
            l = l.strip().split('\t')
            if l[14] not in urls:
                urls[l[14]] = 0
            urls[l[14]] += 1
for file in files:
    with open(file,'r') as f:
        for l in f:
            l = l.strip().split('\t')
            if urls[l[14]] > 1 and len(l) > 16:
                if l[14] not in judgements and 'error' not in l[16].strip().lower():
                    judgements[l[14]] = []
                if 'error' not in l[16].strip().lower(): 
                    currentJudgements = []
                    for phrase in l[16:]:
                        currentJudgements.append(set(re.sub(' +', ' ',phrase.strip()).split(' ')))
                    judgements[l[14]].append(currentJudgements)

In [128]:
def getScoreUnigram(candidate, gold):
    #Unigram Levenshtein distance
    #First we produce all possible pairs and greedily select 
    scoring, bestMatch = {}, {}
    maxScore = 0
    maxLabel = ''
    #Generate all possible combinations
    for goldLabel in gold:
        goldKey = str(goldLabel)
        scoring[goldKey] = {}
        for candidateLabel in candidate:
            candidateKey = str(candidateLabel)
            scoring[goldKey][candidateKey] = (len(goldLabel) - len(goldLabel-candidateLabel))/len(goldLabel)
    #Greedily select best combination and then remove all related combinations.
    while len(scoring) > 0:
        maxScore = 0
        maxLabel = ''
        for goldLabel in scoring:
            goldKey = str(goldLabel)
            for candidateLabel in scoring[goldKey]:
                candidateKey = str(candidateLabel)
                score = scoring[goldKey][candidateKey]
                if score >= maxScore:
                    maxScore = score
                    maxLabel = (goldKey, candidateKey)
        bestMatch[maxLabel] = scoring[maxLabel[0]][maxLabel[1]]
        scoring.pop(maxLabel[0])#remove all pairs that could
    return sum(bestMatch.values())/len(gold)      

In [129]:
def getScoreEM(candidate, gold):
    #Unigram Levenshtein distance
    #First we produce all possible pairs and greedily select 
    scoring, bestMatch = {}, {}
    maxScore = 0
    maxLabel = ''
    #Generate all possible combinations
    for goldLabel in gold:
        goldKey = str(goldLabel)
        scoring[goldKey] = {}
        for candidateLabel in candidate:
            candidateKey = str(candidateLabel)
            if goldLabel == candidateLabel:
                scoring[goldKey][candidateKey] = 1
            else:
                scoring[goldKey][candidateKey] = 0
    #Greedily select best combination and then remove all related combinations.
    while len(scoring) > 0:
        maxScore = -1
        maxLabel = ''
        for goldLabel in scoring:
            goldKey = str(goldLabel)
            for candidateLabel in scoring[goldKey]:
                candidateKey = str(candidateLabel)
                score = scoring[goldKey][candidateKey]
                if score >= maxScore:
                    maxScore = score
                    maxLabel = (goldKey, candidateKey)
        bestMatch[maxLabel] = scoring[maxLabel[0]][maxLabel[1]]
        scoring.pop(maxLabel[0])#remove all pairs that could
    return sum(bestMatch.values())/len(gold) 

In [141]:
#Boostrapping 
randomEM = []
randomUnigram = []
for i in range(10000):
    scoresUnigram = []
    scoresEM = []
    for url in judgements:
        goldIndex = random.randrange(0,len(judgements[url]))
        for i in range(len(judgements[url])):
            if goldIndex != i:
                scoresUnigram.append(getScoreUnigram(judgements[url][goldIndex],judgements[url][i]))
                scoresEM.append(getScoreEM(judgements[url][goldIndex],judgements[url][i]))
    randomEM.append(np.mean(scoresEM))
    randomUnigram.append(np.mean(scoresUnigram))
print('Exact Match max:{} min:{} mean:{}'.format(np.max(randomEM), np.min(randomEM),np.mean(randomEM)))
print('Unigram max:{} min:{} mean:{}'.format(np.max(randomUnigram), np.min(randomUnigram),np.mean(randomUnigram)))

Exact Match max:0.4968253968253968 min:0.3658730158730159 mean:0.4354140476190477
Unigram max:0.6573544973544972 min:0.4976322751322751 mean:0.5770007037037038


In [139]:
#First Judge as Gold
scoresUnigram = []
scoresEM = []
for url in judgements:
    for i in range(len(judgements[url][1:])):
            scoresUnigram.append(getScoreUnigram(judgements[url][0],judgements[url][i]))
            scoresEM.append(getScoreEM(judgements[url][0],judgements[url][i]))
print('Exact Match max:{} min:{} mean:{}'.format(np.max(scoresEM), np.min(scoresEM),np.mean(scoresEM)))
print('Unigram max:{} min:{} mean:{}'.format(np.max(scoresUnigram), np.min(scoresUnigram),np.mean(scoresUnigram)))

Exact Match max:1.0 min:0.0 mean:0.5579365079365078
Unigram max:1.0 min:0.0 mean:0.6900396825396826


In [137]:
#Highest Agreement
#This form tends to skew towards a perfect score not because judges always get it right but because the judge with the highest agreement tended to only submit a single keyphrase. Since we are selecting the best match greedily judges tend to have agreement with this single keyphrase in at least one of their other keyphrases
scoresEM = []
scoresUnigram
for url in judgements:
    goldIndex = random.randrange(0,len(judgements[url]))
    maxScore = 0
    maxSequence = []
    for i in range(len(judgements[url])):
        currentRuns = []
        for j in range(len(judgements[url])):
            if j != i:
                currentRuns.append(getScoreUnigram(judgements[url][i],judgements[url][j]))
            if len(currentRuns) > 0:
                currentScore = np.mean(currentRuns)
                if maxScore < currentScore:
                    maxScore = currentScore
                    maxSequence = currentRuns
    scoresUnigram += maxSequence
for url in judgements:
    goldIndex = random.randrange(0,len(judgements[url]))
    maxScore = 0
    maxSequence = []
    for i in range(len(judgements[url])):
        currentRuns = []
        for j in range(len(judgements[url])):
            if j != i:
                currentRuns.append(getScoreEM(judgements[url][i],judgements[url][j]))
            currentScore = np.mean(currentRuns)
            if maxScore < currentScore:
                maxScore = currentScore
                maxSequence = currentRuns
    scoresEM += maxSequence
print('Exact Match max:{} min:{} mean:{}'.format(np.max(scoresEM), np.min(scoresEM),np.mean(scoresEM)))
print('Unigram max:{} min:{} mean:{}'.format(np.max(scoresUnigram), np.min(scoresUnigram),np.mean(scoresUnigram)))

Exact Match max:1.0 min:0.0 mean:0.5466988727858293
Unigram max:1.0 min:0.0 mean:0.7270899470899471


In [138]:
#Pariwise
scoresUnigram = []
scoresEM = []
for url in judgements:
    for i in range(len(judgements[url])):
        currentRuns = []
        for j in range(len(judgements[url])):
            if j != i:
                currentRuns.append(getScoreUnigram(judgements[url][i],judgements[url][j]))
        if len(currentRuns) > 0:
            scoresUnigram.append(np.sum(currentRuns)/(len(judgements[url])-1))
for url in judgements:
    for i in range(len(judgements[url])):
        currentRuns = []
        for j in range(len(judgements[url])):
            if j != i:
                currentRuns.append(getScoreEM(judgements[url][i],judgements[url][j]))
        if len(currentRuns) > 0:
            scoresEM.append(np.sum(currentRuns)/(len(judgements[url])-1))
print('Exact Match max:{} min:{} mean:{}'.format(np.max(scoresEM), np.min(scoresEM),np.mean(scoresEM)))
print('Unigram max:{} min:{} mean:{}'.format(np.max(scoresUnigram), np.min(scoresUnigram),np.mean(scoresUnigram)))

Exact Match max:1.0 min:0.0 mean:0.43513257575757575
Unigram max:1.0 min:0.0 mean:0.5766466750841751
