In [1]:
import pandas as pd
import json

In [2]:
filename = "train"

In [3]:
with open(f"{filename}.json") as file:
    file_json = json.load(file)

In [4]:
def read_ct_json(filename):
    with open(f"CT json/{filename}.json") as file:
        ct_json = json.load(file)
        
    return ct_json

In [5]:
def add_cohort_information(subsection_prefix):
    if 'adverse events' in subsection_prefix.lower():
        subsection_prefix = subsection_prefix.lower().replace('adverse events', 'adverse events cohort')
        print(subsection_prefix)
    if 'results' in subsection_prefix.lower():
        subsection_prefix = subsection_prefix.lower().replace('results', 'results cohort')
        print(subsection_prefix)
    if 'intervention' in subsection_prefix.lower():
        subsection_prefix = subsection_prefix.lower().replace('intervention', 'intervention cohort')
        print(subsection_prefix)
    return subsection_prefix

In [6]:
def is_subsection_heading(answer_line):
    return answer_line.strip().endswith(':') and len(answer_line.strip()) <= 30

In [24]:
subsections = []

In [25]:
def generate_answers(ct_json, section, qn, idx_list, trial_prefix, qn_idx, idx_offset=0):
    answers = []
    subsection_prefix = ''
    for i in range(len(ct_json[section])):
        answer_line = ct_json[section][i]
        
        if is_subsection_heading(answer_line):
            print(f'Subsection: {answer_line}')
            subsections.append(answer_line.strip().lower().replace(':', ''))
            # begin of subsection
            subsection_prefix = add_cohort_information(answer_line.strip() + ' ')            
        
        answers.append({
            'question': qn,
            'sentence': trial_prefix + subsection_prefix + answer_line,
            'label': (1 if i in idx_list else 0),
            'idx': (qn_idx * 1000 + idx_offset + i)
        })

    return answers

In [26]:
def generate_data_item(key, qn_idx, is_test=False):
    ct_json = read_ct_json(file_json[key]['Primary_id'])
    qn = file_json[key]['Statement']
    section = file_json[key]['Section_id']
    ct_type = file_json[key]['Type']
    
    trial_prefix = 'Primary trial: ' # if ct_type == 'Comparison' else ''
    
    if not is_test:
        idx_list = file_json[key]['Primary_evidence_index']        
    else:
        idx_list = []
    
    answers = generate_answers(ct_json, section, qn, idx_list, trial_prefix, qn_idx)
    
    if 'Secondary_id' in file_json[key].keys():        
        secondary_ct_json = read_ct_json(file_json[key]['Secondary_id'])
        secondary_text_context = '.'.join(secondary_ct_json[section])
        trial_prefix = 'Secondary trial: '
        idx_offset = len(answers)
        
        if not is_test:
            secondary_idx_list = file_json[key]['Secondary_evidence_index']
        else:
            secondary_idx_list = []
        
        secondary_answers = generate_answers(secondary_ct_json, section, qn, secondary_idx_list, trial_prefix, qn_idx, idx_offset) 
        answers.extend(secondary_answers)
        
    return answers

In [27]:
generate_data_item('83b83400-1439-462d-bba3-42817b5b1fa1', 1)
#generate_data_item('0a6d1b4c-244e-44e2-a229-62e4cbdfa979', 1)
#generate_data_item('9f978634-637c-472f-a588-6f4bb2fb121f', 1, True)

Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 


[{'question': 'Most of the cases of CHF in the primary trial, were in cohort 1.',
  'sentence': 'Primary trial: adverse events cohort 1: Adverse Events 1:',
  'label': 1,
  'idx': 1000},
 {'question': 'Most of the cases of CHF in the primary trial, were in cohort 1.',
  'sentence': 'Primary trial: adverse events cohort 1:   Total: 12/32 (37.50%)',
  'label': 0,
  'idx': 1001},
 {'question': 'Most of the cases of CHF in the primary trial, were in cohort 1.',
  'sentence': 'Primary trial: adverse events cohort 1:   Anaemia 0/32 (0.00%)',
  'label': 0,
  'idx': 1002},
 {'question': 'Most of the cases of CHF in the primary trial, were in cohort 1.',
  'sentence': 'Primary trial: adverse events cohort 1:   Neutropenia 1/32 (3.13%)',
  'label': 0,
  'idx': 1003},
 {'question': 'Most of the cases of CHF in the primary trial, were in cohort 1.',
  'sentence': 'Primary trial: adverse events cohort 1:   Thrombocytopenia 4/32 (12.50%)',
  'label': 0,
  'idx': 1004},
 {'question': 'Most of the cas

In [28]:
data_items = []
qn_idx = 0
for key in file_json.keys():
    data_item = generate_data_item(key, qn_idx, (True if filename == 'test' else False))
    data_items.extend(data_item)
    qn_idx += 1

Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: DISEASE CHARACTERISTICS:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: INTERVENTION 1: 
intervention 

Subsection: DISEASE CHARACTERISTICS:
Subsection:   PATIENT CHARACTERISTICS:
Subsection: Inclusion criteria:
Subsection: Exclusion criteria:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: DISEASE CHARACTERISTICS:
Subsection:   PATIENT CHARACTER

Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Eligibility Criteria:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adve

Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection:   Excluded therapies include:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 

Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Adverse Ev

Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection:   Hematopoietic status:
Subsection:   Hepatic status:
Subsection: Exclusion Criteria:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Adverse Events 1:
adverse ev

Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adv

Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: DISEASE CHARACTERISTICS:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
resu

Subsection: DISEASE CHARACTERISTICS:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection:   Adequate bone marrow function:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION

Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
interv

Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Inclusion Criteria:
Subsection:   Other laboratory t

Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection:   No reproductive potential:
Subsection: Exclusion Criteria:
Subsection: INCLUSION CRITERIA:
Subsection: EXCLUSION CRITERIA:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse even

Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Eligibility Criteria:
Subsection: Inclusion Criteria:
Subsection:   Prior therapy inclusion:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: 

Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection:   Disease-Specific Exclusions:
Subsection:   General Medical Exclusions:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Su

Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection:

Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse 

Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Me

Subsection: Inclusion Criteria:
Subsection:   Hematopoietic status:
Subsection:   Hepatic status:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria :
Subsection:   Exclusion Criteria :
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: Inclusion criteria:
Subsection: Exclusion criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Su

Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: A

Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: DISEASE CHARACTERISTICS:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 


Subsection: DISEASE CHARACTERISTICS:
Subsection:   Hormone receptor status:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: DISEASE CHARACTERISTICS:
Subsection:   Hormone receptor status:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   Age:
Subsection:   Sex:
Subsection:   Menopausal status:
Subsection: Performance status:
Subsection:   Life expectancy:
Subsection:   Hematopoietic:
Subsection:   Hepatic:
Subsection:   Renal:
Subsection:   Cardiovascular:
Subsection:   Other:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection:   Bi

Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 2:
adverse events 

Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection:   Disease-Specific Exclusions:
Subsection:   General Medical Exclusions:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Inclusion Criteria:
Subsection: Exclusion Criteria:
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: Adverse Events 1:
adverse events cohort 1: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: DISEASE CHARACTERISTICS:
Subsection:   Hormone receptor status:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   Age:
Subsection:   Sex:
Subsection:   Menopausal status:
Subsection: Performance status:
Subsection:   Life expectancy:
Subsection:   Hematopoietic:
Subsection:   Hepatic:
Subsection:   Renal:
Subsection:   Cardiovascular:
Subsection:   Other:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection:   Biologic therapy:
Subsection:   Chemotherapy:
Subsection:   Endocri

Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: INTERVENTION 1: 
intervention cohort 1: 
Subsection: INTERVENTION 2: 
intervention cohort 2: 
Subsection: DISEASE CHARACTERISTICS:
Subsection:   Hormone-receptor status:
Subsection:   PATIENT CHARACTERISTICS:
Subsection:   PRIOR CONCURRENT THERAPY:
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: Results 2: 
results cohort 2: 
Subsection: Outcome Measurement: 
Subsection: Results 1: 
results cohort 1: 
Subsection: R

In [29]:
subsections

['adverse events 1',
 'adverse events 2',
 'intervention 1',
 'intervention 1',
 'intervention 2',
 'disease characteristics',
 'patient characteristics',
 'prior concurrent therapy',
 'adverse events 1',
 'adverse events 1',
 'adverse events 2',
 'inclusion criteria',
 'exclusion criteria',
 'intervention 1',
 'intervention 2',
 'intervention 1',
 'intervention 2',
 'inclusion criteria',
 'inclusion criteria',
 'exclusion criteria',
 'inclusion criteria',
 'exclusion criteria',
 'patient characteristics',
 'prior concurrent therapy',
 'intervention 1',
 'intervention 1',
 'intervention 2',
 'inclusion criteria',
 'exclusion criteria',
 'intervention 1',
 'intervention 1',
 'inclusion criteria',
 'exclusion criteria',
 'adverse events 1',
 'adverse events 2',
 'disease characteristics',
 'hormone receptor status',
 'patient characteristics',
 'prior concurrent therapy',
 'inclusion criteria',
 'exclusion criteria',
 'intervention 1',
 'intervention 2',
 'intervention 1',
 'intervention

In [30]:
import collections

In [31]:
counter = collections.Counter(subsections)

In [34]:
counter.most_common(20)

[('adverse events 1', 684),
 ('intervention 1', 637),
 ('inclusion criteria', 531),
 ('exclusion criteria', 521),
 ('adverse events 2', 400),
 ('intervention 2', 396),
 ('outcome measurement', 390),
 ('results 1', 390),
 ('results 2', 285),
 ('prior concurrent therapy', 91),
 ('patient characteristics', 87),
 ('disease characteristics', 85),
 ('hormone receptor status', 30),
 ('other', 18),
 ('age', 14),
 ('menopausal status', 14),
 ('performance status', 14),
 ('life expectancy', 14),
 ('hematopoietic', 14),
 ('hepatic', 14)]

In [14]:
df = pd.DataFrame(data_items)

In [15]:
df.head(20)

Unnamed: 0,question,sentence,label,idx
0,Women suffering from both claustrophobia and I...,Primary trial: Inclusion Criteria: Inclusion C...,0,0
1,Women suffering from both claustrophobia and I...,Primary trial: Inclusion Criteria: Women are...,0,1
2,Women suffering from both claustrophobia and I...,Primary trial: Exclusion Criteria: Exclusion C...,0,2
3,Women suffering from both claustrophobia and I...,Primary trial: Exclusion Criteria: History o...,0,3
4,Women suffering from both claustrophobia and I...,Secondary trial: Inclusion Criteria: Inclusion...,0,4
5,Women suffering from both claustrophobia and I...,Secondary trial: Inclusion Criteria: Women w...,0,5
6,Women suffering from both claustrophobia and I...,Secondary trial: Exclusion Criteria: Exclusion...,0,6
7,Women suffering from both claustrophobia and I...,Secondary trial: Exclusion Criteria: Pregnancy,0,7
8,Women suffering from both claustrophobia and I...,Secondary trial: Exclusion Criteria: Ferroma...,0,8
9,Women suffering from both claustrophobia and I...,Secondary trial: Exclusion Criteria: History...,0,9


In [16]:
df[df['label']==0].size/df.size

1.0

In [17]:
data = {
    'data': data_items
}

In [34]:
json_object = json.dumps(data, indent=4)
 
with open(f"{filename}-qnli-v3.json", "w") as outfile:
    outfile.write(json_object)

In [18]:
result_data_items = []

for key in file_json.keys():
    result_data_items.append(key)    

In [19]:
def reverse_idx_lookup(result_data_items, file_json, idx):
    qn_idx = idx // 1000
    answer_idx = idx % 1000    
    
    key = result_data_items[qn_idx]
    
    ct_json = read_ct_json(file_json[key]['Primary_id'])
    #qn = file_json[key]['Statement']
    section = file_json[key]['Section_id']    
    primary_section = ct_json[section]
    #ct_type = file_json[key]['Type']
    #idx_list = file_json[key]['Primary_evidence_index']
    
    is_primary = True
    if answer_idx >= len(primary_section):
        # index is in the secondary list
        is_primary = False
        
        inner_idx = answer_idx - len(primary_section)
        #secondary_ct_json = read_ct_json(file_json[key]['Secondary_id'])
        #secondary_text_context = '.'.join(secondary_ct_json[section])
    else:        
        inner_idx = answer_idx
        
    return key, inner_idx, is_primary    

In [40]:
# Load results
with open(f"{filename}-qnli-results-v3-BiomedNLP-best-epoch.json") as file:
    file_results_json = json.load(file)

In [41]:
result_predictions = {}

In [42]:
for result in file_results_json:
    if result['label'] == 1:
        (key, inner_idx, is_primary) = reverse_idx_lookup(result_data_items, file_json, result['idx'])
        if not key in result_predictions.keys():
            result_predictions[key] = {
                'Primary_evidence_index': [],
                'Secondary_evidence_index': []
            }

        if is_primary:
            result_predictions[key]['Primary_evidence_index'].append(inner_idx)
        else:
            result_predictions[key]['Secondary_evidence_index'].append(inner_idx)        

In [43]:
result_predictions

{'9f978634-637c-472f-a588-6f4bb2fb121f': {'Primary_evidence_index': [0, 2],
  'Secondary_evidence_index': [0, 2, 4, 6, 7, 8, 9, 10, 13, 14]},
 '20b34e62-97c2-4ca0-bb1d-7824dab0b8bb': {'Primary_evidence_index': [0,
   1,
   2,
   3,
   4,
   5],
  'Secondary_evidence_index': []},
 '893a5337-2aa9-4a87-a020-4c2f03cd4aea': {'Primary_evidence_index': [0],
  'Secondary_evidence_index': [0, 4]},
 'd401affc-f081-4eee-bd61-d109cc88f6de': {'Primary_evidence_index': [0],
  'Secondary_evidence_index': [0]},
 '791790a6-187b-4e4b-be5f-9e5304e9ec2c': {'Primary_evidence_index': [0, 1, 2],
  'Secondary_evidence_index': [0, 3, 4, 5]},
 'b95b7438-ec16-4d4d-826d-5891e7982b36': {'Primary_evidence_index': [0, 1],
  'Secondary_evidence_index': [0, 1, 2, 3]},
 '4988cb16-7dbb-4847-84e0-4a7957b32c72': {'Primary_evidence_index': [2, 13],
  'Secondary_evidence_index': [0,
   1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20,
   21,
   22

In [44]:
json_object = json.dumps(result_predictions, indent=4)
 
with open(f"{filename}-qnli-results-v3-BiomedNLP-best-epoch-submit.json", "w") as outfile:
    outfile.write(json_object)