Skip to content

Commit

Permalink
BUGFIX: Post-process ok with non-uniform annotations
Browse files Browse the repository at this point in the history
Before it was assumed that all annotation tasks used the same
set of instructions for all texts.  Now it is possible to have
some tasks annotated with certain instructions and others with
other instructions.

Furthermore, before it used to output all scores for the same
text horizontally.  However, now it only does this for the text
annotated under a single instruction set.

This can be viewed in the demo output
tests/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/
  • Loading branch information
timmahrt committed Oct 31, 2016
1 parent a70601e commit 5f55968
Show file tree
Hide file tree
Showing 16 changed files with 206 additions and 149 deletions.
141 changes: 75 additions & 66 deletions lmeds/post_process/transpose_rpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,23 @@ def _transposeRPT(dataListOfLists):
returnDict = {}
bCountList = []
pCountList = []
j = -1
for dataList in dataListOfLists:
j += 1
bCountList.append([])
pCountList.append([])

oom = utils.orderOfMagnitude(len(dataList)) + 1
stimTemplate = "s_%%0%dd,%%s,%%s" % oom
tmpAspectListToCount = []
i = 0
for taskName, stimuliArgList, _, dataTxt in dataList:
stimuliID = stimuliArgList[0]
i += 1
word = stimuliArgList[0]
aspect = stimuliArgList[4]

stimuliID = stimTemplate % (i, word, aspect)

tmpAspectListToCount.append(aspect)
dataList = dataTxt.split(",")

Expand All @@ -53,32 +61,33 @@ def _transposeRPT(dataListOfLists):
pCountList[-1].append(len(pScores))
bCountList[-1].append(len(bScores))

idKeyList.append(stimuliID)
aspectKeyList.append(aspect)
if j == 0:
idKeyList.append(stimuliID)
aspectKeyList.append(aspect)

returnDict.setdefault(stimuliID, {})
returnDict[stimuliID].setdefault(aspect, {})
returnDict[stimuliID][aspect].setdefault(B, [])
returnDict[stimuliID][aspect].setdefault(P, [])
returnDict[stimuliID].setdefault(B, [])
returnDict[stimuliID].setdefault(P, [])

returnDict[stimuliID][aspect][B].append(bScores)
returnDict[stimuliID][aspect][P].append(pScores)

idKeyList = list(set(idKeyList))
idKeyList.sort()

aspectKeyList = returnDict[list(returnDict.keys())[0]].keys()
returnDict[stimuliID][B].append(bScores)
returnDict[stimuliID][P].append(pScores)

# Transpose the data
for sid in idKeyList:
for aspect in aspectKeyList:
for taskType in [B, P]:
zipped = utils.safeZip(returnDict[sid][aspect][taskType],
enforceLength=True)
returnDict[sid][aspect][taskType] = [list(subTuple)
for subTuple in zipped]
for taskType in [B, P]:
try:
tmpList = returnDict[sid][taskType]
except KeyError:
continue
if len(tmpList) == 0:
continue
zipped = utils.safeZip(tmpList,
enforceLength=True)
returnDict[sid][taskType] = [list(subTuple)
for subTuple in zipped]

return returnDict, idKeyList, aspectKeyList
return returnDict, idKeyList


def _getScores(userData, scoreType):
Expand All @@ -90,8 +99,6 @@ def _getScores(userData, scoreType):

sumList = ["%.03f" % (sum([float(val) for val in subList]) / len(subList))
for subList in scoreList]
# scoreList = [lst + ["%.03f" % sumVal, ]
# for lst, sumVal in zip(scoreList, sumList)]

return scoreList, sumList

Expand All @@ -111,7 +118,7 @@ def _outputScores(featPath, aspect, stimulusID, returnDict,
fn = join(scorePath, "%s.csv" % stimulusID)
with io.open(fn, "w", encoding="utf-8") as fd:
fd.write("\n".join([",".join(val) for val in
returnDict[stimulusID][aspect][scoreType]]))
returnDict[stimulusID][scoreType]]))


def _getSmallestPrefix(keywordList):
Expand All @@ -131,17 +138,17 @@ def _getSmallestPrefix(keywordList):
return wordPrefixDict


def _buildHeader(fnList, aspectKeyList, pageName, doSequenceOrder):
def _buildHeader(fnList, pageName, doSequenceOrder):
# Build the name lists, which will take up the first two rows in the
# spreadsheet. One is normal, and one is anonymized
oom = utils.orderOfMagnitude(len(fnList))
userNameTemplate = "t%%0%dd" % (oom + 1) + ".%s%%s"
userNameTemplate = "t%%0%dd" % (oom + 1) + ".%s"

bNameList = [os.path.splitext(name)[0] + ".b%s" for name in fnList]
bNameList = [os.path.splitext(name)[0] + ".b" for name in fnList]
anonBNameList = [userNameTemplate % (i + 1, 'b')
for i in range(len(fnList))]

pNameList = [os.path.splitext(name)[0] + ".p%s" for name in fnList]
pNameList = [os.path.splitext(name)[0] + ".p" for name in fnList]
anonPNameList = [userNameTemplate % (i + 1, 'p')
for i in range(len(fnList))]
headerDict = {"boundary": (bNameList, anonBNameList),
Expand All @@ -150,10 +157,8 @@ def _buildHeader(fnList, aspectKeyList, pageName, doSequenceOrder):
"boundary_and_prominence": (bNameList + pNameList,
anonBNameList + anonPNameList)}

aspectInitialsDict = _getSmallestPrefix(aspectKeyList)

bTxt = "sum.b%(aspect)s"
pTxt = "sum.p%(aspect)s"
bTxt = "sum.b"
pTxt = "sum.p"

txtPrefixDict = {"boundary": (bTxt),
"prominence": (pTxt),
Expand All @@ -166,30 +171,28 @@ def _buildHeader(fnList, aspectKeyList, pageName, doSequenceOrder):
sumHeaderList = []
headerList = []
anonHeaderList = []
for aspect in aspectKeyList:
aspectInitial = aspectInitialsDict[aspect]
sumHeaderList.append(header2Prefix % {'aspect': aspectInitial})
headerList.extend([name % aspectInitial
for name in nameList])
anonHeaderList.extend([name % aspectInitial
for name in anonNameList])
sumHeaderList.append(header2Prefix)
headerList.extend([name
for name in nameList])
anonHeaderList.extend([name
for name in anonNameList])

sumTxt = ",".join(sumHeaderList)
headerStr = ",".join(headerList)
anonHeaderStr = ",".join(anonHeaderList)

rowTemplate = "StimulusID,Word,%s,%s"
rowTemplate = "StimulusID,txtKey,instructKey,Word,%s,%s"

headerRow = rowTemplate % (sumTxt, headerStr)
anonHeaderRow = rowTemplate % (sumTxt, anonHeaderStr)

# Add the sequence order if needed
if doSequenceOrder:
txtPrefixDict2 = {"boundary": "b%(aspect)s",
"prominence": "p%(aspect)s",
"syllable_marking": "p%(aspect)s",
"boundary_and_prominence": "bp%(aspect)s"}
sequencePageCode = txtPrefixDict2[pageName] % {'aspect': aspectInitial}
txtPrefixDict2 = {"boundary": "b",
"prominence": "p",
"syllable_marking": "p",
"boundary_and_prominence": "bp"}
sequencePageCode = txtPrefixDict2[pageName]
tmpTuple = transpose_utils.getUserSeqHeader(fnList,
sequencePageCode,
oom)
Expand Down Expand Up @@ -269,44 +272,50 @@ def transposeRPT(path, txtPath, pageName, outputPath):
txtDict[name] = [syllable for word in txt.split(",") if word != ""
for syllable in word.split(demarcator)]

returnDict, idKeyList, aspectKeyList = _transposeRPT(responseDataList)
returnDict, idKeyList = _transposeRPT(responseDataList)

doUserSeqHeader = len(orderListOfLists) > 0
headerRow, anonHeaderRow = _buildHeader(fnList, aspectKeyList, pageName,
headerRow, anonHeaderRow = _buildHeader(fnList, pageName,
doUserSeqHeader)

# Format the output rpt scores
aggrOutputList = [headerRow, anonHeaderRow]
for i in range(len(idKeyList)):

stimulusID = idKeyList[i]
wordList = txtDict[stimulusID]

wordList = txtDict[stimulusID.split(",")[1]]
stimulusIDList = [stimulusID for _ in wordList]
aspectSumList = [stimulusIDList, wordList, ]
aspectList = []
for aspect in aspectKeyList:
bScoreList, bSumList = _getScores(returnDict[stimulusID][aspect],

try:
bScoreList, bSumList = _getScores(returnDict[stimulusID],
B)
pScoreList, pSumList = _getScores(returnDict[stimulusID][aspect],
except KeyError:
pass
try:
pScoreList, pSumList = _getScores(returnDict[stimulusID],
P)

if pageName == "boundary":
aspectSumList.extend([bSumList, ])
aspectList.extend([bScoreList, ])
elif pageName in ["prominence", "syllable_marking"]:
aspectSumList.extend([pSumList, ])
aspectList.extend([pScoreList, ])
elif pageName == "boundary_and_prominence":
aspectSumList.extend([bSumList, pSumList, ])
aspectList.extend([bScoreList, pScoreList, ])
except KeyError:
pass

# Extend header with sequence order information
if doUserSeqHeader:
orderStr = orderList[i]
numAnnotators = range(max([len(bSumList), len(pSumList)]))
tmpOrderList = [orderStr for _ in numAnnotators]
aspectList.extend([tmpOrderList, ])
if pageName == "boundary":
aspectSumList.extend([bSumList, ])
aspectList.extend([bScoreList, ])
elif pageName in ["prominence", "syllable_marking"]:
aspectSumList.extend([pSumList, ])
aspectList.extend([pScoreList, ])
elif pageName == "boundary_and_prominence":
aspectSumList.extend([bSumList, pSumList, ])
aspectList.extend([bScoreList, pScoreList, ])

# Extend header with sequence order information
if doUserSeqHeader:
orderStr = orderList[i]
numAnnotators = range(max([len(bSumList), len(pSumList)]))
tmpOrderList = [orderStr for _ in numAnnotators]
aspectList.extend([tmpOrderList, ])

dataList = aspectSumList + aspectList
combinedList = [_unifyRow(row) for row in
Expand Down
2 changes: 1 addition & 1 deletion lmeds/user_scripts/post_process_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def postProcessResults(testName, sequenceFN, removeDuplicatesFlag,
transpose_choice.transposeChoice(join(pathToData, pageName),
pageName,
outputPath)


if __name__ == "__main__":

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
StimulusID,txtKey,instructKey,Word,sum.b,bob.b,mary.b,sarah.b
StimulusID,txtKey,instructKey,Word,sum.b,t1.b,t2.b,t3.b
s_1,apples,nonspecific_boundary_instr,today,1.000,1,1,1
s_1,apples,nonspecific_boundary_instr,is,0.000,0,0,0
s_1,apples,nonspecific_boundary_instr,a,0.000,0,0,0
s_1,apples,nonspecific_boundary_instr,good,0.000,0,0,0
s_1,apples,nonspecific_boundary_instr,day,0.000,0,0,0
s_1,apples,nonspecific_boundary_instr,to,0.000,0,0,0
s_1,apples,nonspecific_boundary_instr,buy,0.000,0,0,0
s_1,apples,nonspecific_boundary_instr,apples,0.333,1,0,0
s_2,apples,boundary_at_most_one,today,0.000,0,0,0
s_2,apples,boundary_at_most_one,is,0.000,0,0,0
s_2,apples,boundary_at_most_one,a,0.000,0,0,0
s_2,apples,boundary_at_most_one,good,0.667,1,0,1
s_2,apples,boundary_at_most_one,day,0.333,0,1,0
s_2,apples,boundary_at_most_one,to,0.000,0,0,0
s_2,apples,boundary_at_most_one,buy,0.000,0,0,0
s_2,apples,boundary_at_most_one,apples,0.000,0,0,0
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
StimulusID,txtKey,instructKey,Word,sum.b,sum.p,bob.b,mary.b,sarah.b,bob.p,mary.p,sarah.p
StimulusID,txtKey,instructKey,Word,sum.b,sum.p,t1.b,t2.b,t3.b,t1.p,t2.p,t3.p
s_1,apples,nonspecific_boundary_instr,today,1.000,0.667,1,1,1,1,0,1
s_1,apples,nonspecific_boundary_instr,is,0.000,0.000,0,0,0,0,0,0
s_1,apples,nonspecific_boundary_instr,a,0.000,0.000,0,0,0,0,0,0
s_1,apples,nonspecific_boundary_instr,good,0.000,0.000,0,0,0,0,0,0
s_1,apples,nonspecific_boundary_instr,day,0.000,0.000,0,0,0,0,0,0
s_1,apples,nonspecific_boundary_instr,to,0.000,0.000,0,0,0,0,0,0
s_1,apples,nonspecific_boundary_instr,buy,0.000,0.000,0,0,0,0,0,0
s_1,apples,nonspecific_boundary_instr,apples,0.000,1.000,0,0,0,1,1,1
s_2,apples,mark_one_boundary,today,0.667,0.000,1,0,1,0,0,0
s_2,apples,mark_one_boundary,is,0.000,0.000,0,0,0,0,0,0
s_2,apples,mark_one_boundary,a,0.000,0.000,0,0,0,0,0,0
s_2,apples,mark_one_boundary,good,0.333,0.000,0,1,0,0,0,0
s_2,apples,mark_one_boundary,day,0.000,0.333,0,0,0,0,0,1
s_2,apples,mark_one_boundary,to,0.000,0.000,0,0,0,0,0,0
s_2,apples,mark_one_boundary,buy,0.000,0.333,0,0,0,0,1,0
s_2,apples,mark_one_boundary,apples,0.000,0.333,0,0,0,1,0,0
s_3,apples,keyboard_shortcuts_boundary,today,0.000,0.000,0,0,0,0,0,0
s_3,apples,keyboard_shortcuts_boundary,is,0.000,0.000,0,0,0,0,0,0
s_3,apples,keyboard_shortcuts_boundary,a,0.000,0.000,0,0,0,0,0,0
s_3,apples,keyboard_shortcuts_boundary,good,0.000,0.333,0,0,0,0,1,0
s_3,apples,keyboard_shortcuts_boundary,day,1.000,0.000,1,1,1,0,0,0
s_3,apples,keyboard_shortcuts_boundary,to,0.000,0.000,0,0,0,0,0,0
s_3,apples,keyboard_shortcuts_boundary,buy,0.000,0.000,0,0,0,0,0,0
s_3,apples,keyboard_shortcuts_boundary,apples,0.000,0.000,0,0,0,0,0,0
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
stimulusID,arg1,arg2,arg3,arg4,arg5,arg6,arg7,bob.media_choice,mary.media_choice,sarah.media_choice
stimulusID,arg1,arg2,arg3,arg4,arg5,arg6,arg7,t1.media_choice,t2.media_choice,t3.media_choice
s0,same_different_instr,audio,0.5,1,-1,[[water apples]],[same different],0,0,1
s1,same_different_instr,audio,0.5,1,2,[[water] [apples]],[same different],1,1,1
s2,prominence_video_instr,video,0.5,1,-1,[[syllables_video]],[vnonprom nonprominent neutral prominent vprom],0,0,0
s3,one_play_two_response,audio,0.5,1,-1,[[water apples]],[same different],bindPlayKeyIDList=p,bindResponseKeyIDList=,[z m],1,1,0
s4,two_play_two_response,audio,0.5,1,2,[[water] [apples]],[same different],bindPlayKeyIDList=,[q p],bindResponseKeyIDList=,[z m],0,1,0
s5,prominence_point_instr,audio,0.5,1,-1,[[water]],[vnonprom nonprominent neutral prominent vprom],transcriptList=,[water_word],0,1,0
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
s0,same_different_instr,audio,0.5,1,-1,[[water apples]],[same different]
s1,same_different_instr,audio,0.5,1,2,[[water] [apples]],[same different]
s2,prominence_video_instr,video,0.5,1,-1,[[syllables_video]],[vnonprom nonprominent neutral prominent vprom]
s3,one_play_two_response,audio,0.5,1,-1,[[water apples]],[same different],bindPlayKeyIDList=p,bindResponseKeyIDList=,[z m]
s4,two_play_two_response,audio,0.5,1,2,[[water] [apples]],[same different],bindPlayKeyIDList=,[q p],bindResponseKeyIDList=,[z m]
s5,prominence_point_instr,audio,0.5,1,-1,[[water]],[vnonprom nonprominent neutral prominent vprom],transcriptList=,[water_word]
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
StimulusID,txtKey,instructKey,Word,sum.p,bob.p,mary.p,sarah.p
StimulusID,txtKey,instructKey,Word,sum.p,t1.p,t2.p,t3.p
s_1,syllables,nonspecific_syllables,u,0.667,0,1,1
s_1,syllables,nonspecific_syllables,ni,0.333,1,0,0
s_1,syllables,nonspecific_syllables,ver,0.000,0,0,0
s_1,syllables,nonspecific_syllables,si,0.333,0,1,0
s_1,syllables,nonspecific_syllables,ty,0.000,0,0,0
s_1,syllables,nonspecific_syllables,ba,0.333,0,0,1
s_1,syllables,nonspecific_syllables,by,0.000,0,0,0
s_1,syllables,nonspecific_syllables,di,0.000,0,0,0
s_1,syllables,nonspecific_syllables,vi,0.333,1,0,0
s_1,syllables,nonspecific_syllables,sion,0.000,0,0,0
s_1,syllables,nonspecific_syllables,en,0.000,0,0,0
s_1,syllables,nonspecific_syllables,cy,1.000,1,1,1
s_1,syllables,nonspecific_syllables,clo,0.000,0,0,0
s_1,syllables,nonspecific_syllables,pe,0.000,0,0,0
s_1,syllables,nonspecific_syllables,di,0.333,0,0,1
s_1,syllables,nonspecific_syllables,a,0.000,0,0,0
s_1,syllables,nonspecific_syllables,um,0.000,0,0,0
s_1,syllables,nonspecific_syllables,bre,0.333,1,0,0
s_1,syllables,nonspecific_syllables,lla,0.333,0,1,0

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit 5f55968

Please sign in to comment.