BUGFIX: Post-process ok with non-uniform annotations

Before it was assumed that all annotation tasks used the same set of instructions for all texts. Now it is possible to have some tasks annotated with certain instructions and others with other instructions. Furthermore, before it used to output all scores for the same text horizontally. However, now it only does this for the text annotated under a single instruction set. This can be viewed in the demo output tests/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/
timmahrt · Oct 31, 2016 · 5f55968 · 5f55968
1 parent a70601e
commit 5f55968
Show file tree

Hide file tree

Showing 16 changed files with 206 additions and 149 deletions.
diff --git a/lmeds/post_process/transpose_rpt.py b/lmeds/post_process/transpose_rpt.py
@@ -26,15 +26,23 @@ def _transposeRPT(dataListOfLists):
     returnDict = {}
     bCountList = []
     pCountList = []
+    j = -1
     for dataList in dataListOfLists:
+        j += 1
         bCountList.append([])
         pCountList.append([])
 
+        oom = utils.orderOfMagnitude(len(dataList)) + 1
+        stimTemplate = "s_%%0%dd,%%s,%%s" % oom
         tmpAspectListToCount = []
+        i = 0
         for taskName, stimuliArgList, _, dataTxt in dataList:
-            stimuliID = stimuliArgList[0]
+            i += 1
+            word = stimuliArgList[0]
             aspect = stimuliArgList[4]
 
+            stimuliID = stimTemplate % (i, word, aspect)
+
             tmpAspectListToCount.append(aspect)
             dataList = dataTxt.split(",")
 
@@ -53,32 +61,33 @@ def _transposeRPT(dataListOfLists):
             pCountList[-1].append(len(pScores))
             bCountList[-1].append(len(bScores))
 
-            idKeyList.append(stimuliID)
-            aspectKeyList.append(aspect)
+            if j == 0:
+                idKeyList.append(stimuliID)
+                aspectKeyList.append(aspect)
 
             returnDict.setdefault(stimuliID, {})
             returnDict[stimuliID].setdefault(aspect, {})
-            returnDict[stimuliID][aspect].setdefault(B, [])
-            returnDict[stimuliID][aspect].setdefault(P, [])
+            returnDict[stimuliID].setdefault(B, [])
+            returnDict[stimuliID].setdefault(P, [])
 
-            returnDict[stimuliID][aspect][B].append(bScores)
-            returnDict[stimuliID][aspect][P].append(pScores)
-
-    idKeyList = list(set(idKeyList))
-    idKeyList.sort()
-
-    aspectKeyList = returnDict[list(returnDict.keys())[0]].keys()
+            returnDict[stimuliID][B].append(bScores)
+            returnDict[stimuliID][P].append(pScores)
 
     # Transpose the data
     for sid in idKeyList:
-        for aspect in aspectKeyList:
-            for taskType in [B, P]:
-                zipped = utils.safeZip(returnDict[sid][aspect][taskType],
-                                       enforceLength=True)
-                returnDict[sid][aspect][taskType] = [list(subTuple)
-                                                     for subTuple in zipped]
+        for taskType in [B, P]:
+            try:
+                tmpList = returnDict[sid][taskType]
+            except KeyError:
+                continue
+            if len(tmpList) == 0:
+                continue
+            zipped = utils.safeZip(tmpList,
+                                   enforceLength=True)
+            returnDict[sid][taskType] = [list(subTuple)
+                                         for subTuple in zipped]
 
-    return returnDict, idKeyList, aspectKeyList
+    return returnDict, idKeyList
 
 
 def _getScores(userData, scoreType):
@@ -90,8 +99,6 @@ def _getScores(userData, scoreType):
 
     sumList = ["%.03f" % (sum([float(val) for val in subList]) / len(subList))
                for subList in scoreList]
-#     scoreList = [lst + ["%.03f" % sumVal, ]
-#                  for lst, sumVal in zip(scoreList, sumList)]
 
     return scoreList, sumList
 
@@ -111,7 +118,7 @@ def _outputScores(featPath, aspect, stimulusID, returnDict,
     fn = join(scorePath, "%s.csv" % stimulusID)
     with io.open(fn, "w", encoding="utf-8") as fd:
         fd.write("\n".join([",".join(val) for val in
-                            returnDict[stimulusID][aspect][scoreType]]))
+                            returnDict[stimulusID][scoreType]]))
 
 
 def _getSmallestPrefix(keywordList):
@@ -131,17 +138,17 @@ def _getSmallestPrefix(keywordList):
     return wordPrefixDict
 
 
-def _buildHeader(fnList, aspectKeyList, pageName, doSequenceOrder):
+def _buildHeader(fnList, pageName, doSequenceOrder):
     # Build the name lists, which will take up the first two rows in the
     # spreadsheet.  One is normal, and one is anonymized
     oom = utils.orderOfMagnitude(len(fnList))
-    userNameTemplate = "t%%0%dd" % (oom + 1) + ".%s%%s"
+    userNameTemplate = "t%%0%dd" % (oom + 1) + ".%s"
 
-    bNameList = [os.path.splitext(name)[0] + ".b%s" for name in fnList]
+    bNameList = [os.path.splitext(name)[0] + ".b" for name in fnList]
     anonBNameList = [userNameTemplate % (i + 1, 'b')
                      for i in range(len(fnList))]
 
-    pNameList = [os.path.splitext(name)[0] + ".p%s" for name in fnList]
+    pNameList = [os.path.splitext(name)[0] + ".p" for name in fnList]
     anonPNameList = [userNameTemplate % (i + 1, 'p')
                      for i in range(len(fnList))]
     headerDict = {"boundary": (bNameList, anonBNameList),
@@ -150,10 +157,8 @@ def _buildHeader(fnList, aspectKeyList, pageName, doSequenceOrder):
                   "boundary_and_prominence": (bNameList + pNameList,
                                               anonBNameList + anonPNameList)}
 
-    aspectInitialsDict = _getSmallestPrefix(aspectKeyList)
-
-    bTxt = "sum.b%(aspect)s"
-    pTxt = "sum.p%(aspect)s"
+    bTxt = "sum.b"
+    pTxt = "sum.p"
 
     txtPrefixDict = {"boundary": (bTxt),
                      "prominence": (pTxt),
@@ -166,30 +171,28 @@ def _buildHeader(fnList, aspectKeyList, pageName, doSequenceOrder):
     sumHeaderList = []
     headerList = []
     anonHeaderList = []
-    for aspect in aspectKeyList:
-        aspectInitial = aspectInitialsDict[aspect]
-        sumHeaderList.append(header2Prefix % {'aspect': aspectInitial})
-        headerList.extend([name % aspectInitial
-                           for name in nameList])
-        anonHeaderList.extend([name % aspectInitial
-                               for name in anonNameList])
+    sumHeaderList.append(header2Prefix)
+    headerList.extend([name
+                       for name in nameList])
+    anonHeaderList.extend([name
+                           for name in anonNameList])
 
     sumTxt = ",".join(sumHeaderList)
     headerStr = ",".join(headerList)
     anonHeaderStr = ",".join(anonHeaderList)
 
-    rowTemplate = "StimulusID,Word,%s,%s"
+    rowTemplate = "StimulusID,txtKey,instructKey,Word,%s,%s"
 
     headerRow = rowTemplate % (sumTxt, headerStr)
     anonHeaderRow = rowTemplate % (sumTxt, anonHeaderStr)
 
     # Add the sequence order if needed
     if doSequenceOrder:
-        txtPrefixDict2 = {"boundary": "b%(aspect)s",
-                          "prominence": "p%(aspect)s",
-                          "syllable_marking": "p%(aspect)s",
-                          "boundary_and_prominence": "bp%(aspect)s"}
-        sequencePageCode = txtPrefixDict2[pageName] % {'aspect': aspectInitial}
+        txtPrefixDict2 = {"boundary": "b",
+                          "prominence": "p",
+                          "syllable_marking": "p",
+                          "boundary_and_prominence": "bp"}
+        sequencePageCode = txtPrefixDict2[pageName]
         tmpTuple = transpose_utils.getUserSeqHeader(fnList,
                                                     sequencePageCode,
                                                     oom)
@@ -269,44 +272,50 @@ def transposeRPT(path, txtPath, pageName, outputPath):
             txtDict[name] = [syllable for word in txt.split(",") if word != ""
                              for syllable in word.split(demarcator)]
 
-    returnDict, idKeyList, aspectKeyList = _transposeRPT(responseDataList)
+    returnDict, idKeyList = _transposeRPT(responseDataList)
 
     doUserSeqHeader = len(orderListOfLists) > 0
-    headerRow, anonHeaderRow = _buildHeader(fnList, aspectKeyList, pageName,
+    headerRow, anonHeaderRow = _buildHeader(fnList, pageName,
                                             doUserSeqHeader)
 
     # Format the output rpt scores
     aggrOutputList = [headerRow, anonHeaderRow]
     for i in range(len(idKeyList)):
 
         stimulusID = idKeyList[i]
-        
-        wordList = txtDict[stimulusID]
+
+        wordList = txtDict[stimulusID.split(",")[1]]
         stimulusIDList = [stimulusID for _ in wordList]
         aspectSumList = [stimulusIDList, wordList, ]
         aspectList = []
-        for aspect in aspectKeyList:
-            bScoreList, bSumList = _getScores(returnDict[stimulusID][aspect],
+
+        try:
+            bScoreList, bSumList = _getScores(returnDict[stimulusID],
                                               B)
-            pScoreList, pSumList = _getScores(returnDict[stimulusID][aspect],
+        except KeyError:
+            pass
+        try:
+            pScoreList, pSumList = _getScores(returnDict[stimulusID],
                                               P)
-
-            if pageName == "boundary":
-                aspectSumList.extend([bSumList, ])
-                aspectList.extend([bScoreList, ])
-            elif pageName in ["prominence", "syllable_marking"]:
-                aspectSumList.extend([pSumList, ])
-                aspectList.extend([pScoreList, ])
-            elif pageName == "boundary_and_prominence":
-                aspectSumList.extend([bSumList, pSumList, ])
-                aspectList.extend([bScoreList, pScoreList, ])
+        except KeyError:
+            pass
 
-            # Extend header with sequence order information
-            if doUserSeqHeader:
-                orderStr = orderList[i]
-                numAnnotators = range(max([len(bSumList), len(pSumList)]))
-                tmpOrderList = [orderStr for _ in numAnnotators]
-                aspectList.extend([tmpOrderList, ])
+        if pageName == "boundary":
+            aspectSumList.extend([bSumList, ])
+            aspectList.extend([bScoreList, ])
+        elif pageName in ["prominence", "syllable_marking"]:
+            aspectSumList.extend([pSumList, ])
+            aspectList.extend([pScoreList, ])
+        elif pageName == "boundary_and_prominence":
+            aspectSumList.extend([bSumList, pSumList, ])
+            aspectList.extend([bScoreList, pScoreList, ])
+
+        # Extend header with sequence order information
+        if doUserSeqHeader:
+            orderStr = orderList[i]
+            numAnnotators = range(max([len(bSumList), len(pSumList)]))
+            tmpOrderList = [orderStr for _ in numAnnotators]
+            aspectList.extend([tmpOrderList, ])
 
         dataList = aspectSumList + aspectList
         combinedList = [_unifyRow(row) for row in

diff --git a/lmeds/user_scripts/post_process_results.py b/lmeds/user_scripts/post_process_results.py
@@ -264,7 +264,7 @@ def postProcessResults(testName, sequenceFN, removeDuplicatesFlag,
             transpose_choice.transposeChoice(join(pathToData, pageName),
                                              pageName,
                                              outputPath)
-    
+
 
 if __name__ == "__main__":
 

diff --git a/tests/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/boundary.csv b/tests/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/boundary.csv
@@ -0,0 +1,18 @@
+StimulusID,txtKey,instructKey,Word,sum.b,bob.b,mary.b,sarah.b
+StimulusID,txtKey,instructKey,Word,sum.b,t1.b,t2.b,t3.b
+s_1,apples,nonspecific_boundary_instr,today,1.000,1,1,1
+s_1,apples,nonspecific_boundary_instr,is,0.000,0,0,0
+s_1,apples,nonspecific_boundary_instr,a,0.000,0,0,0
+s_1,apples,nonspecific_boundary_instr,good,0.000,0,0,0
+s_1,apples,nonspecific_boundary_instr,day,0.000,0,0,0
+s_1,apples,nonspecific_boundary_instr,to,0.000,0,0,0
+s_1,apples,nonspecific_boundary_instr,buy,0.000,0,0,0
+s_1,apples,nonspecific_boundary_instr,apples,0.333,1,0,0
+s_2,apples,boundary_at_most_one,today,0.000,0,0,0
+s_2,apples,boundary_at_most_one,is,0.000,0,0,0
+s_2,apples,boundary_at_most_one,a,0.000,0,0,0
+s_2,apples,boundary_at_most_one,good,0.667,1,0,1
+s_2,apples,boundary_at_most_one,day,0.333,0,1,0
+s_2,apples,boundary_at_most_one,to,0.000,0,0,0
+s_2,apples,boundary_at_most_one,buy,0.000,0,0,0
+s_2,apples,boundary_at_most_one,apples,0.000,0,0,0
diff --git a/...s/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/boundary_and_prominence.csv b/...s/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/boundary_and_prominence.csv
@@ -0,0 +1,26 @@
+StimulusID,txtKey,instructKey,Word,sum.b,sum.p,bob.b,mary.b,sarah.b,bob.p,mary.p,sarah.p
+StimulusID,txtKey,instructKey,Word,sum.b,sum.p,t1.b,t2.b,t3.b,t1.p,t2.p,t3.p
+s_1,apples,nonspecific_boundary_instr,today,1.000,0.667,1,1,1,1,0,1
+s_1,apples,nonspecific_boundary_instr,is,0.000,0.000,0,0,0,0,0,0
+s_1,apples,nonspecific_boundary_instr,a,0.000,0.000,0,0,0,0,0,0
+s_1,apples,nonspecific_boundary_instr,good,0.000,0.000,0,0,0,0,0,0
+s_1,apples,nonspecific_boundary_instr,day,0.000,0.000,0,0,0,0,0,0
+s_1,apples,nonspecific_boundary_instr,to,0.000,0.000,0,0,0,0,0,0
+s_1,apples,nonspecific_boundary_instr,buy,0.000,0.000,0,0,0,0,0,0
+s_1,apples,nonspecific_boundary_instr,apples,0.000,1.000,0,0,0,1,1,1
+s_2,apples,mark_one_boundary,today,0.667,0.000,1,0,1,0,0,0
+s_2,apples,mark_one_boundary,is,0.000,0.000,0,0,0,0,0,0
+s_2,apples,mark_one_boundary,a,0.000,0.000,0,0,0,0,0,0
+s_2,apples,mark_one_boundary,good,0.333,0.000,0,1,0,0,0,0
+s_2,apples,mark_one_boundary,day,0.000,0.333,0,0,0,0,0,1
+s_2,apples,mark_one_boundary,to,0.000,0.000,0,0,0,0,0,0
+s_2,apples,mark_one_boundary,buy,0.000,0.333,0,0,0,0,1,0
+s_2,apples,mark_one_boundary,apples,0.000,0.333,0,0,0,1,0,0
+s_3,apples,keyboard_shortcuts_boundary,today,0.000,0.000,0,0,0,0,0,0
+s_3,apples,keyboard_shortcuts_boundary,is,0.000,0.000,0,0,0,0,0,0
+s_3,apples,keyboard_shortcuts_boundary,a,0.000,0.000,0,0,0,0,0,0
+s_3,apples,keyboard_shortcuts_boundary,good,0.000,0.333,0,0,0,0,1,0
+s_3,apples,keyboard_shortcuts_boundary,day,1.000,0.000,1,1,1,0,0,0
+s_3,apples,keyboard_shortcuts_boundary,to,0.000,0.000,0,0,0,0,0,0
+s_3,apples,keyboard_shortcuts_boundary,buy,0.000,0.000,0,0,0,0,0,0
+s_3,apples,keyboard_shortcuts_boundary,apples,0.000,0.000,0,0,0,0,0,0
diff --git a/tests/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/media_choice.csv b/tests/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/media_choice.csv
@@ -0,0 +1,8 @@
+stimulusID,arg1,arg2,arg3,arg4,arg5,arg6,arg7,bob.media_choice,mary.media_choice,sarah.media_choice
+stimulusID,arg1,arg2,arg3,arg4,arg5,arg6,arg7,t1.media_choice,t2.media_choice,t3.media_choice
+s0,same_different_instr,audio,0.5,1,-1,[[water apples]],[same different],0,0,1
+s1,same_different_instr,audio,0.5,1,2,[[water] [apples]],[same different],1,1,1
+s2,prominence_video_instr,video,0.5,1,-1,[[syllables_video]],[vnonprom nonprominent neutral prominent vprom],0,0,0
+s3,one_play_two_response,audio,0.5,1,-1,[[water apples]],[same different],bindPlayKeyIDList=p,bindResponseKeyIDList=,[z m],1,1,0
+s4,two_play_two_response,audio,0.5,1,2,[[water] [apples]],[same different],bindPlayKeyIDList=,[q p],bindResponseKeyIDList=,[z m],0,1,0
+s5,prominence_point_instr,audio,0.5,1,-1,[[water]],[vnonprom nonprominent neutral prominent vprom],transcriptList=,[water_word],0,1,0
diff --git a/...ds_demo/output/LMEDS_Demo/duplicates_not_removed_results/media_choice_answer_template.csv b/...ds_demo/output/LMEDS_Demo/duplicates_not_removed_results/media_choice_answer_template.csv
@@ -0,0 +1,6 @@
+s0,same_different_instr,audio,0.5,1,-1,[[water apples]],[same different]
+s1,same_different_instr,audio,0.5,1,2,[[water] [apples]],[same different]
+s2,prominence_video_instr,video,0.5,1,-1,[[syllables_video]],[vnonprom nonprominent neutral prominent vprom]
+s3,one_play_two_response,audio,0.5,1,-1,[[water apples]],[same different],bindPlayKeyIDList=p,bindResponseKeyIDList=,[z m]
+s4,two_play_two_response,audio,0.5,1,2,[[water] [apples]],[same different],bindPlayKeyIDList=,[q p],bindResponseKeyIDList=,[z m]
+s5,prominence_point_instr,audio,0.5,1,-1,[[water]],[vnonprom nonprominent neutral prominent vprom],transcriptList=,[water_word]
diff --git a/..._Demo/post_process_results/postsurvey.csv → ...icates_not_removed_results/postsurvey.csv b/..._Demo/post_process_results/postsurvey.csv → ...icates_not_removed_results/postsurvey.csv
diff --git a/...S_Demo/post_process_results/presurvey.csv → ...licates_not_removed_results/presurvey.csv b/...S_Demo/post_process_results/presurvey.csv → ...licates_not_removed_results/presurvey.csv
diff --git a/tests/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/syllable_marking.csv b/tests/lmeds_demo/output/LMEDS_Demo/duplicates_not_removed_results/syllable_marking.csv
@@ -0,0 +1,21 @@
+StimulusID,txtKey,instructKey,Word,sum.p,bob.p,mary.p,sarah.p
+StimulusID,txtKey,instructKey,Word,sum.p,t1.p,t2.p,t3.p
+s_1,syllables,nonspecific_syllables,u,0.667,0,1,1
+s_1,syllables,nonspecific_syllables,ni,0.333,1,0,0
+s_1,syllables,nonspecific_syllables,ver,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,si,0.333,0,1,0
+s_1,syllables,nonspecific_syllables,ty,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,ba,0.333,0,0,1
+s_1,syllables,nonspecific_syllables,by,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,di,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,vi,0.333,1,0,0
+s_1,syllables,nonspecific_syllables,sion,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,en,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,cy,1.000,1,1,1
+s_1,syllables,nonspecific_syllables,clo,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,pe,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,di,0.333,0,0,1
+s_1,syllables,nonspecific_syllables,a,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,um,0.000,0,0,0
+s_1,syllables,nonspecific_syllables,bre,0.333,1,0,0
+s_1,syllables,nonspecific_syllables,lla,0.333,0,1,0
diff --git a/tests/lmeds_demo/output/LMEDS_Demo/post_process_results/boundary.csv b/tests/lmeds_demo/output/LMEDS_Demo/post_process_results/boundary.csv
diff --git a/tests/lmeds_demo/output/LMEDS_Demo/post_process_results/boundary_and_prominence.csv b/tests/lmeds_demo/output/LMEDS_Demo/post_process_results/boundary_and_prominence.csv
diff --git a/tests/lmeds_demo/output/LMEDS_Demo/post_process_results/prominence.csv b/tests/lmeds_demo/output/LMEDS_Demo/post_process_results/prominence.csv