In [2]:
import json
from labelrepo.projects.nv_task import load_annotations

In [143]:
semiauto_labels = json.load(open('../labels/neurovault-cobidas.json'))
labels = [l['name'] for l in semiauto_labels]
annotations = select_annotations(labels=labels, project_name='semiauto_ma_features')

### Clean up

Replace None and Unsure labels as columns for other annotations

In [144]:
annotations = load_annotations()

In [145]:
# Number of documents with at least one `TaskName` label

all_unique = annotations.pmcid.unique()
labeled_unique = annotations[annotations.label_name == 'TaskName']
not_none = labeled_unique[labeled_unique['None'] == False]

print(f'Number of documents with at least one `TaskName` label: {len(labeled_unique)}, not None: {len} and out of {len(all_unique)}')


Number of documents with at least one `TaskName` label: 82, out of 105


In [147]:
annotations[(annotations.label_name == 'TaskName') & (annotations['None'] == False)].pmcid.unique().shape

(69,)

In [108]:
annotations = annotations[annotations.pmcid.isin(labeled_unique)]

In [109]:
annotations.annotator_name.unique()

array(['delavega_nv', 'delavega-aliceoverlap', 'alice_chen',
       'delavega-other'], dtype=object)

### Overlap with Alice

In [110]:
alice_pmcids = annotations[(annotations.annotator_name == 'alice_chen') & (annotations.label_name == 'TaskName')].pmcid.unique()
ovelap = annotations[annotations.pmcid.isin(alice_pmcids)]
overlap_pmcids = ovelap[(ovelap.annotator_name != 'alice_chen') & (ovelap.label_name == 'TaskName')].pmcid.unique()

In [111]:
overlap = annotations[annotations.pmcid.isin(overlap_pmcids)]

In [112]:
overlap_task = overlap[overlap.label_name.isin(['TaskName', 'None', 'Unsure'])]

In [113]:
key_cols = ['label_name', 'selected_text', 'annotator_name']
for pmcid, df in overlap_task.groupby('pmcid'):
    print(f'PMCID: {pmcid}')
    print(df.iloc[0].title)

    df = df[key_cols]   
    
    alice = df[df.annotator_name == 'alice_chen']
    other = df[df.annotator_name != 'alice_chen']

    print(alice)
    print()
    print(other)
    print('---')


PMCID: 2241626
Fast reproducible identification and large-scale databasing of individual functional cognitive networks
    label_name                 selected_text annotator_name
193   TaskName       mental calculation task     alice_chen
194     Unsure       mental calculation task     alice_chen
195   TaskName  language comprehension task      alice_chen
196     Unsure  language comprehension task      alice_chen
199   TaskName      horizontal checkerboards     alice_chen
200   TaskName        vertical checkerboards     alice_chen
205   TaskName         auditory calculations     alice_chen
206     Unsure         auditory calculations     alice_chen
207   TaskName           visual calculations     alice_chen
208     Unsure           visual calculations     alice_chen

    label_name                selected_text         annotator_name
197   TaskName  fast brain mapping sequence  delavega-aliceoverlap
198     Unsure  fast brain mapping sequence  delavega-aliceoverlap
---
PMCID: 4440210


In [114]:
annotations[annotations.pmcid == 6303343]

Unnamed: 0,pmcid,title,doc_md5,label_name,extra_data,selected_text,start_char,end_char,project_name,annotator_name,label_color,context,context_start_char,context_end_char,doc_length
662,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,Modality-fMRI-BOLD,,fMRI,1030,1034,semiauto_ma_features,delavega_nv,#98df8a,"ongly , or more stably suppressed to prom...",830,1234,50984
663,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,ContrastDefinition,,Participants completed a version of the Balloo...,8611,10538,semiauto_ma_features,delavega_nv,#98df8a,ions outlined by the Declaration of Helsinki a...,8411,10738,50984
664,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,TaskName,,Balloon Analogue Risk Task (BART),8651,8684,semiauto_ma_features,delavega_nv,#aec7e8,inki and experimental protocols were approved ...,8451,8884,50984
665,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,Condition,,pump decisions,19987,20001,semiauto_ma_features,delavega_nv,#c5b0d5,Whole-brain statistical analyses were performe...,19787,20201,50984
666,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,Condition,,cash-outs,20003,20012,semiauto_ma_features,delavega_nv,#c5b0d5,istical analyses were performed using the gene...,19803,20212,50984
667,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,Condition,,explosion,20018,20027,semiauto_ma_features,delavega_nv,#c5b0d5,s were performed using the general linear mode...,19818,20227,50984
668,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,ContrastDefinition,,“Task-Positive” Regions,41244,41267,semiauto_ma_features,delavega_nv,#98df8a,Anatomical Region +/− ...,41044,41467,50984
669,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,ContrastDefinition,,“Task-Negative” Regions,43071,43094,semiauto_ma_features,delavega_nv,#98df8a,R Cerebellum + ...,42871,43294,50984
670,6303343,Contributions of default mode network stabilit...,a6b28cfc7bccdc6e0b212e8fdb4413af,ContrastDefinition,,Pumps,48029,48034,semiauto_ma_features,delavega_nv,#98df8a,6 −46 ...,47829,48234,50984


Based on Alejandro's subjective manual review, Alice & Alejandro overlapped on 9 / 13 studies.

Upon review:
 - In 3 of these, Alejandro's annotations was corrected, and in 1 Alice was incorrect.
 - Alice often tagged many non-tasks as tasks (with Unsure label), as well as used the Unsure label on real task names, more liberally tagging potential tasks

### Get annotation sections

Loading annotated documents to compute the section from which annotation belongs to

In [115]:
from labelrepo.documents import load_central_documents
unique_md5 = {}
for pmcid, df in annotations.groupby('pmcid'):
    unique_md5[f"pmcid_{pmcid}"] = df.doc_md5.unique().tolist()

annotated_docs = load_central_documents(unique_md5)

In [116]:
# Turn into dict with key as pmcid
annotated_docs = {doc['metadata']['pmcid']: doc for doc in annotated_docs}

In [117]:
# Get section for each annotation from document metadata

def get_section(pmcid, start, end):
    doc = annotated_docs[pmcid]
    for section, (s, e) in doc['metadata']['field_positions'].items():
        if start >= s and end <= e:
            return section

    return None

annotations['section'] = annotations.apply(lambda x: get_section(x.pmcid, x.start_char, x.end_char), axis=1)

### Unique tasks

In [118]:
a_annotations = annotations[annotations.annotator_name != 'alice_chen']

In [119]:
annotations.annotator_name.unique()

array(['delavega_nv', 'delavega-aliceoverlap', 'alice_chen',
       'delavega-other'], dtype=object)

In [120]:
pmcidst = annotations[annotations.annotator_name == 'delavega_withtables'].pmcid.unique()
pmcidsd = annotations[annotations.annotator_name == 'delavega'].pmcid.unique()

In [121]:
pmcidsd

array([], dtype=int64)

In [125]:
a_annotations[(a_annotations.label_name == 'TaskName') & (a_annotations.section == 'body')].selected_text.unique()

array(['listening to an auditory narrative',
       'verbal self-referential processing (SRP)', 'checking-in',
       'Classmates task', 'single-cue conditioning paradigm',
       'semantic judgment task', 'non-verbal tone judgment task',
       'multiple-threat paradigm', 'Chatroom fMRI Task',
       'Empathy-for-pain task', 'Emotional reactivity task',
       'Montreal Imaging Stress Task (MIST)',
       'six-alternative forced-choice cued-recognition task',
       'stopwatch task', 'fast brain mapping sequence', 'one-back task',
       'movement observation paradigm', 'Voice localizer paradigm',
       'single food choice task', 'fear inducing paradigm',
       'Soccer Paradigm', 'monetary incentive paradigm',
       'amygdala activation task', 'Typing task', 'Reading task',
       'Typing-movement task ', 'Writing task', 'Writing-movement task',
       'implicit emotion processing task ',
       'explicit emotion identification task', 'Task', 'movie', 'task',
       'revised Social

### Remove Task=None

In [127]:
a_annotations[a_annotations.label_name == 'None']

Unnamed: 0,pmcid,title,doc_md5,label_name,extra_data,selected_text,start_char,end_char,project_name,annotator_name,label_color,context,context_start_char,context_end_char,doc_length,section
3,10028637,Responsiveness variability during anaesthesia ...,059effc005faf5394dc9da8bc4648449,,,listening to an auditory narrative,10033,10067,semiauto_ma_features,delavega_nv,#c0bfbc,"ach session, a clinical assessment of sedation...",9833,10267,58644,body
351,4488375,Shaped by the Past: The Default Mode Network S...,cea0847fc75496838a6d4287a483291a,,,novel task,745,755,semiauto_ma_features,delavega_nv,#c0bfbc,pothesis that the role of the DMN in higher or...,545,955,38497,abstract
399,4547715,Women’s Preference for a Male Acquaintance Enh...,1eaaca763ae1f468eb482fad21274a32,,,Task,10561,10565,semiauto_ma_features,delavega_nv,#c0bfbc,"iversity, or at a club activity inside or outs...",10361,10765,36025,body
409,4914983,Brain hemodynamic activity during viewing and ...,73948deb28963e895d15218a4b8b0a72,,,movie,9544,9549,semiauto_ma_features,delavega_nv,#c0bfbc,"cortex , and subcortically putamen , as well a...",9344,9749,50696,body
424,5324609,The role of the hippocampus in generalizing co...,16ef9d7ab42b76b944ac37702a03bbb3,,,task,5547,5551,semiauto_ma_features,delavega-aliceoverlap,#c0bfbc,s had either normal or corrected‐to‐normal vis...,5347,5751,24283,body
472,5662713,Reinstatement of memory representations for li...,c3e954f529f8385dea6099ae47d4d3d0,,movie watching,Participants watched 24 short videos while in ...,8728,8789,semiauto_ma_features,delavega_nv,#c0bfbc,as approved by the Brighton and Sussex Medical...,8528,8989,57902,body
480,5716095,Love flows downstream: mothers’ and children’s...,5c072a008153780e56be93cf6e22976f,,,task that measures empathic responses when per...,7007,7096,semiauto_ma_features,delavega_nv,#c0bfbc,iated with mentalizing processes will shed lig...,6807,7296,79133,body
507,5895040,Domain-General and Domain-Specific Patterns of...,12db1d269c1617ea11d3a26873c13532,,,perception,6608,6618,semiauto_ma_features,delavega_nv,#c0bfbc,ow). We present the results of 24 subjects who...,6408,6818,76672,body
509,5895040,Domain-General and Domain-Specific Patterns of...,12db1d269c1617ea11d3a26873c13532,,,memory,6619,6625,semiauto_ma_features,delavega_nv,#c0bfbc,sent the results of 24 subjects whose data wer...,6419,6825,76672,body
526,5973829,Cognitive regulation alters social and dietary...,9f8430b76dd3687151caa77ce95069e5,,,The altruism task was an fMRI compatible versi...,63581,66081,semiauto_ma_features,delavega_nv,#c0bfbc,r prior to testing. Stimulus presentation was ...,63381,66281,98744,body
