### Hemonc

In [1]:
import os
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
from Bio import Entrez
from urllib.error import HTTPError
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed

In [2]:
columns_ref = ['study', 'condition', 'pmid', 'pub.date']
columns_result = ['study', 'condition', 'regimen', 'comparator', 'efficacy']
columns_indication = ['study', 'condition', 'stage_or_status']

preprocess = lambda df, col: df[col].dropna().drop_duplicates().sort_values(col).reset_index(drop=True)
ref = preprocess(pd.read_csv("../dataverse_files/ref.table.csv", encoding='latin1'), columns_ref)
result = preprocess(pd.read_csv("../dataverse_files/study_results.csv", encoding='latin1'), columns_result)
indication = preprocess(pd.read_csv("../dataverse_files/indications.csv", encoding='latin1'), columns_indication)

efficacy = pd.read_excel("../dataverse_files/Tables/efficacy.xlsx")
efficacy2label = dict(zip(efficacy['efficacy_raw'], efficacy['efficacy_std']))
efficacy2label = {k:v if 'Might Be ' not in v else v[len('Might Be '):] for k,v in efficacy2label.items()}
stage = pd.read_excel("../dataverse_files/Tables/stage.xlsx")
stage2label = dict(zip(stage['stage_raw'], stage['stage_std']))

In [3]:
study2pmids = defaultdict(set)
for _, row in ref.iterrows():
    study2pmids[(row['study'], row['condition'])].add(row['pmid'])
    
study2result = defaultdict(set)
for _, row in result.iterrows():
    try:
        if (row['study'], row['condition']) not in study2pmids or efficacy2label[row['efficacy']] == 'Other' or row['regimen'] == row['comparator']: continue
    except KeyError:
        print(f"KeyError for study: {row['study']}, condition: {row['condition']}, efficacy: {row['efficacy']}")
        continue
    label = efficacy2label[row['efficacy']]
    switched = label.replace('Inferior', 'Superior') if 'Inferior' in label else label.replace('Superior', 'Inferior')
    study2result[(row['study'], row['condition'])].add((row['regimen'], row['comparator'], label))
    study2result[(row['study'], row['condition'])].add((row['comparator'], row['regimen'], switched))
    
for study in set(study2pmids.keys()) - set(study2result.keys()):
    del study2pmids[study]
    
study2stage = defaultdict(set)
for _, row in indication.iterrows():
    if (row['study'], row['condition']) not in study2result: continue
    study2stage[(row['study'], row['condition'])].add(stage2label[row['stage_or_status']])

In [9]:
Entrez.email = "yuxin102@mit.edu"
def get_text(pmid, retries=50):
    for i in range(retries):
        try:
            handle = Entrez.efetch(db="pubmed", id=pmid, rettype="xml", retmode="text")
            record = Entrez.read(handle)
            article = record['PubmedArticle'][0]['MedlineCitation']['Article']
            doc = article['ArticleTitle']
            if 'Abstract' in article: doc = ' '.join([doc] + article['Abstract']['AbstractText'])
            return pmid, doc
        except HTTPError as e:
            if e.code == 429: time.sleep(i)
            else: raise Exception(f"Failed to fetch data for PMID {pmid} after {retries} retries.")

pmid2doc = {}
path_doc = "../dataverse_files/Tables/docs.csv"
if os.path.exists(path_doc):
    pmid2doc = pd.read_csv(path_doc)
    pmid2doc = dict(zip(pmid2doc['pmid'], pmid2doc['doc']))

pmids_missing = set([pmid for study, pmids in study2pmids.items() for pmid in pmids]) - set(pmid2doc.keys())
pmids_missing |= set([pmid for pmid, doc in pmid2doc.items() if not isinstance(doc, str) and np.isnan(doc)])
if len(pmids_missing):
    pmid2doc_missing = {}
    with ThreadPoolExecutor(max_workers=10) as exe:
        futures = [exe.submit(get_text, pmid) for pmid in pmids_missing]
        for future in tqdm(as_completed(futures), total=len(futures)):
            try:
                pmid, doc = future.result()
                pmid2doc_missing[pmid] = doc
            except Exception as e:
                print(f"Error: {e}")
    for pmid in pmid2doc_missing:
        if not isinstance(pmid2doc_missing[pmid], str) and np.isnan(pmid2doc_missing[pmid]):
            pmid2doc_missing[pmid] = get_text(pmid)[1]
    pmid2doc |= pmid2doc_missing
    pmid2doc_df = pd.DataFrame(list(pmid2doc.items()), columns=['pmid', 'doc'])
    pmid2doc_df.to_csv(path_doc, index=False)

100%|██████████| 3125/3125 [16:16<00:00,  3.20it/s]


In [None]:
import openai
openai_client = openai.OpenAI(api_key="")
openai_model = "gpt-4o-mini"

def call_gpt(message_user, retries=50):
    messages = [{"role": "system", "content": 'You are a helpful assistant.'}, 
                {"role": "user", "content": message_user}]
    for i in range(retries):
        try:
            response = openai_client.chat.completions.create(
                model = openai_model, messages = messages, max_tokens = 4096 # 512
            )
            return response.choices[0].message.content
        except openai.RateLimitError as e:
            time.sleep(i)
        except Exception as e:
            print(f"An error occurred: {e}")
            raise
    raise Exception(f"Failed to call GPT-4 after {retries} retries.")

In [12]:
import re
templates_qn = [
    'Choose an option that best describes the efficacy of {REGIMEN} compared to {COMPARATOR} when used to treat {CONDITION}.',
    'Select the option that most accurately reflects the effectiveness of {REGIMEN} versus {COMPARATOR} in treating {CONDITION}.',
    'Which option best summarizes the comparative efficacy of {REGIMEN} and {COMPARATOR} for managing {CONDITION}?',
    'Identify the option that best summarizes the effectiveness of {REGIMEN} versus {COMPARATOR} in treating {CONDITION}.',
    'Which option most effectively illustrates the efficacy of {REGIMEN} when compared with {COMPARATOR} for {CONDITION}?'
]
rephrase = \
f'''
### Instruction
Do not respond to the question. 
Instead, rephrase the given question template into 20 other versions that are semantically equivalent.

## Version 0: {templates_qn[0]}
## Version 1: {templates_qn[1]}
## Version 2: {templates_qn[2]}
## Version 3: {templates_qn[3]}
## Version 4: {templates_qn[4]}
'''
response = call_gpt(rephrase)
matches = [re.search(fr"## Version {i}: (.*)", response) for i in range(5, 20)]           
templates_qn += [each.group(1) for each in matches if each is not None and len(each.group(1))]

In [17]:
option2idx = {'superior':1, 'inferior':2, 'no difference':3}
dataset = []
for key, values in study2result.items():
    evidence = '\n\n'.join([pmid2doc[pmid] for pmid in study2pmids[key]])
    # Add both URLs and PMIDs
    pubmed_urls = ', '.join(sorted(study2pmids[key]))
    pmids = ', '.join(sorted(url.replace('https://pubmed.ncbi.nlm.nih.gov/', '').replace('/', '') 
                            for url in study2pmids[key]))
    stage = '' if key not in study2stage else ' ({})'.format(', '.join(sorted(study2stage[key])))
    conditon = key[1] + stage
    for regimen, comparator, efficacy in values:
        if regimen == comparator: continue
        questions = [each.format(**{'REGIMEN':regimen, 'COMPARATOR':comparator, 'CONDITION':conditon}) for each in templates_qn]
        answer = option2idx[efficacy.lower()]
        dataset.append((evidence, pubmed_urls, pmids, *questions, answer))
        
columns = ['evidence', 'pubmed_urls', 'pmids'] + [f'question {i}' for i in range(1, len(templates_qn)+1)] + ['answer']
dataset = pd.DataFrame(dataset, columns=columns)
dataset = dataset.dropna().drop_duplicates(subset=columns[:-1], keep=False).reset_index(drop=True)
for option, idx in option2idx.items():
    dataset[f'option {idx}'] = option

# Create Data/Input directory if it doesn't exist
import os
os.makedirs('Data/Input', exist_ok=True)

dataset.to_csv('Data/Input/Hemonc.csv', index=False)

In [25]:
dataset['pmids'][2] 

'18955563, 20418244'

In [None]:
option2idx = {'superior':1, 'inferior':2, 'no difference':3}
dataset = []
for key, values in study2result.items():
    evidence = '\n\n'.join([pmid2doc[pmid] for pmid in study2pmids[key]])
    stage = '' if key not in study2stage else ' ({})'.format(', '.join(sorted(study2stage[key])))
    conditon = key[1] + stage
    for regimen, comparator, efficacy in values:
        if regimen == comparator: continue
        questions = [each.format(**{'REGIMEN':regimen, 'COMPARATOR':comparator, 'CONDITION':conditon}) for each in templates_qn]
        answer = option2idx[efficacy.lower()]
        dataset.append((evidence, *questions, answer))
        
columns = ['evidence'] + [f'question {i}' for i in range(1, len(templates_qn)+1)] + ['answer']
dataset = pd.DataFrame(dataset, columns=columns)
dataset = dataset.dropna().drop_duplicates(subset=columns[:-1], keep=False).reset_index(drop=True)
for option, idx in option2idx.items():
    dataset[f'option {idx}'] = option
dataset.to_csv('Data/Input/Hemonc.csv', index=False)

OSError: Cannot save file into a non-existent directory: 'Data/Input'

In [15]:
study2pmids

defaultdict(set,
            {('006/027/ICI',
              'Cervical cancer'): {'https://pubmed.ncbi.nlm.nih.gov/20931299/'},
             ('01-002-0601',
              'Multiple myeloma'): {'https://pubmed.ncbi.nlm.nih.gov/18955563/',
              'https://pubmed.ncbi.nlm.nih.gov/20418244/'},
             ('10TASQ10',
              'Prostate cancer'): {'https://pubmed.ncbi.nlm.nih.gov/27298414/'},
             ('20020408',
              'Colorectal cancer'): {'https://pubmed.ncbi.nlm.nih.gov/17470858/'},
             ('20050181',
              'Colorectal cancer'): {'https://pubmed.ncbi.nlm.nih.gov/20921462/', 'https://pubmed.ncbi.nlm.nih.gov/24356622/'},
             ('20100007',
              'Colorectal cancer'): {'https://pubmed.ncbi.nlm.nih.gov/27736842/', 'https://pubmed.ncbi.nlm.nih.gov/29703606/'},
             ('2014-PT026',
              'Colorectal cancer'): {'https://pubmed.ncbi.nlm.nih.gov/28094194/'},
             ('2016-FXY-075',
              'Nasopharyngeal carcinom

In [14]:
dataset

Unnamed: 0,evidence,question 1,question 2,question 3,question 4,question 5,question 6,question 7,question 8,question 9,...,question 15,question 16,question 17,question 18,question 19,question 20,answer,option 1,option 2,option 3
0,"A double-blind, placebo-controlled, randomized...",Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how Cis...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,2,superior,inferior,no difference
1,"A double-blind, placebo-controlled, randomized...",Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how Cis...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,1,superior,inferior,no difference
2,Thalidomide-dexamethasone compared with melpha...,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how Int...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,1,superior,inferior,no difference
3,Thalidomide-dexamethasone compared with melpha...,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how Int...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,2,superior,inferior,no difference
4,"Randomized, Double-Blind, Placebo-Controlled P...",Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how Tas...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,1,superior,inferior,no difference
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6207,Abemaciclib plus endocrine therapy for hormone...,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how Abe...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,1,superior,inferior,no difference
6208,Topotecan versus paclitaxel for the treatment ...,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how Top...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,1,superior,inferior,no difference
6209,Topotecan versus paclitaxel for the treatment ...,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how Pac...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,2,superior,inferior,no difference
6210,Bendamustine prolongs progression-free surviva...,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,Which choice most accurately depicts the effec...,Identify the choice that most effectively repr...,...,Select the best choice that illustrates the ef...,What option most accurately summarizes how CMF...,Choose the option that most effectively highli...,Which option best outlines the effectiveness o...,Identify the option that conveys the most accu...,Select the statement that appropriately descri...,2,superior,inferior,no difference


In [20]:
import pandas as pd

df = pd.read_csv('data/Hemonc.csv')
ds = pd.read_csv('merged_study_ref_pubmed.csv')
a = df['evidence'][0]

In [None]:
NCTs = []
for index, i in enumerate(dataset['pmids']):
    temp = []
    if ',' in i:
        ids = [int(id) for id in i.split(',')]
    else:
        ids = [int(i)]
    for each_id in ids:
        temp.append(ds[ds['pmid'] == each_id]['trial_id'].values[0])
    if len(set(temp)) > 1:
        print(f"Multiple NCTs found for PMIDs: {i}")
        # remove this row from the dataset
        dataset.drop(index, inplace=True)
    else:
        NCTs.append(set(temp).pop())
NCTs

In [39]:
dataset['NCT'] = NCTs
dataset.to_csv('Data/Input/Hemonc_new.csv', index=False)

In [22]:
import pandas as pd

df = pd.read_csv("../data/Hemonc_new_with_all.csv")

df

Unnamed: 0,evidence,pubmed_urls,pmids,question 1,question 2,question 3,question 4,question 5,question 6,question 7,...,option 1,option 2,option 3,NCT,authors,start_date,has_primary_outcome,has_secondary_outcome,drug_routes,drug_classes
0,"A double-blind, placebo-controlled, randomized...",https://pubmed.ncbi.nlm.nih.gov/20931299/,20931299,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,NCT00532818,"{""20931299"": ""Jaime Coronel|Lucely Cetina|Irla...",2007-07,Yes,Yes,Unknown,Platinum-based Chemotherapy
1,"A double-blind, placebo-controlled, randomized...",https://pubmed.ncbi.nlm.nih.gov/20931299/,20931299,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,NCT00532818,"{""20931299"": ""Jaime Coronel|Lucely Cetina|Irla...",2007-07,Yes,Yes,Unknown,Platinum-based Chemotherapy
2,Thalidomide-dexamethasone compared with melpha...,"https://pubmed.ncbi.nlm.nih.gov/18955563/, htt...","18955563, 20418244",Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,NCT00205751,"{""18955563"": ""Heinz Ludwig|Roman Hajek|Elena T...",2001-08,Yes,Yes,Oral,Immunomodulatory Drugs
3,Thalidomide-dexamethasone compared with melpha...,"https://pubmed.ncbi.nlm.nih.gov/18955563/, htt...","18955563, 20418244",Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,NCT00205751,"{""18955563"": ""Heinz Ludwig|Roman Hajek|Elena T...",2001-08,Yes,Yes,Oral,Immunomodulatory Drugs
4,"Randomized, Double-Blind, Placebo-Controlled P...",https://pubmed.ncbi.nlm.nih.gov/27298414/,27298414,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,NCT01234311,"{""27298414"": ""Cora Sternberg|Andrew Armstrong|...",2011-03,Yes,No,Oral,Chemotherapy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6155,Abemaciclib plus endocrine therapy for hormone...,"https://pubmed.ncbi.nlm.nih.gov/32954927/, htt...","32954927, 34656740, 35337972, 36493792, 38194616",Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,NCT03155997,"{""32954927"": ""Stephen R D Johnston|Nadia Harbe...",2017-07-12,Yes,Yes,Oral,Cdk4/6 Inhibitors
6156,Topotecan versus paclitaxel for the treatment ...,"https://pubmed.ncbi.nlm.nih.gov/14679127/, htt...","14679127, 9196130",Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,,"{""14679127"": ""W ten Bokkel Huinink|S R Lane|G ...",,No,No,Unknown,Unknown
6157,Topotecan versus paclitaxel for the treatment ...,"https://pubmed.ncbi.nlm.nih.gov/14679127/, htt...","14679127, 9196130",Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,,"{""14679127"": ""W ten Bokkel Huinink|S R Lane|G ...",,No,No,Unknown,Unknown
6158,Bendamustine prolongs progression-free surviva...,https://pubmed.ncbi.nlm.nih.gov/16096436/,16096436,Choose an option that best describes the effic...,Select the option that most accurately reflect...,Which option best summarizes the comparative e...,Identify the option that best summarizes the e...,Which option most effectively illustrates the ...,Pick the option that most clearly describes th...,Select the statement that best encapsulates th...,...,superior,inferior,no difference,,"{""16096436"": ""G von Minckwitz|I Chernozemsky|L...",,No,No,Unknown,Unknown


In [23]:
import pandas as pd

df = pd.read_csv("../data/Hemonc_new_with_all.csv")
#drop any row who has na in there
df = df.dropna()

# drop pmids that hold , in them
df = df[~df["pmids"].str.contains(",")]
# drop duplicates on NCT keep first
df = df.drop_duplicates(subset="pmids", keep="first")
df = df.drop_duplicates(subset="NCT", keep="first")

# drop rows in NCT where it does not start with NCT, and drop NA    
df = df.dropna(subset=["NCT"])
df = df[df["NCT"].str.startswith("NCT")]


df.to_csv("../data/Hemonc_dedup_with_all.csv", index=False)

In [17]:
# check df['start_date'] ranges
print(df['start_date'].min())
print(df['start_date'].max())


1990-12
2022-04-19


In [1]:
import pandas as pd

df = pd.read_csv('../results/nct_sonar_deep_506.csv')
df

Unnamed: 0,question,correct_answer,model_output,extracted_info,correct,urls
0,Find/search the clinical trial id that best de...,NCT01234311,"<think>\nOkay, let me figure out how to approa...",NCT01234311,True,
1,Find/search the clinical trial id that best de...,NCT00113763,"<think>\nOkay, let's tackle this query. The us...",NCT00113763,True,
2,Find/search the clinical trial id that best de...,NCT02958111,"<think>\nOkay, let's tackle this query. The us...",NCT02460419,False,
3,Find/search the clinical trial id that best de...,NCT03321539,"<think>\nOkay, I need to find the clinical tri...",NCT01528618,False,
4,Find/search the clinical trial id that best de...,NCT01964170,"<think>\nOkay, let's tackle this query. The us...",NCT00831233,False,
...,...,...,...,...,...,...
501,Find/search the clinical trial id that best de...,NCT01815242,"<think>\nOkay, let me try to figure out the an...",NCT01881230,False,
502,Find/search the clinical trial id that best de...,NCT00685646,"<think>\nOkay, I need to find the clinical tri...",NCT00079001,False,
503,Find/search the clinical trial id that best de...,NCT00012324,"<think>\nOkay, let me try to figure this out. ...",NCT00012324,True,
504,Find/search the clinical trial id that best de...,NCT02813967,"<think>\nOkay, let's tackle this query. The us...",NCT02813967,True,


In [10]:
df1 = pd.read_csv('../results/nct_gpro_tool_506.csv')
df2 = pd.read_csv('../results/nct__sonar_nct_506.csv')
df3 = pd.read_csv('../results/nct_gpt4o_506.csv')

In [12]:
df4 = pd.read_csv('../results/run_april_506/nct_gpt4omini_506.csv')

In [None]:
total = 0
for i,j,k,l,m in zip(df['correct'], df1['correct'], df2['correct'], df3['correct'], df4['correct']):
    if i or j or k or l or m:
        total += 

print(total/len(df))

0.5909090909090909


Unnamed: 0,question,correct_answer,model_output,extracted_info,correct,urls
0,Find/search the clinical trial id that best de...,NCT01234311,"Based on the search results, the primary clini...",NCT01234311,True,
1,Find/search the clinical trial id that best de...,NCT00113763,"Based on the search results, the clinical tria...",NCT00113763,True,
2,Find/search the clinical trial id that best de...,NCT02958111,"Based on the search results, the clinical tria...",NCT02143388,False,
3,Find/search the clinical trial id that best de...,NCT03321539,"Based on the search results, the clinical tria...",NCT01528618,False,
4,Find/search the clinical trial id that best de...,NCT01964170,"Based on the search results, the clinical tria...",NCT00295750,False,
...,...,...,...,...,...,...
501,Find/search the clinical trial id that best de...,NCT01815242,Error: Rate limit exceeded after 5 retries. Pl...,,False,
502,Find/search the clinical trial id that best de...,NCT00685646,Error: Rate limit exceeded after 5 retries. Pl...,,False,
503,Find/search the clinical trial id that best de...,NCT00012324,"Based on the search results, the clinical tria...",NCT00003976,False,https://github.com/jkvoulgaridis/appathonNTUA
504,Find/search the clinical trial id that best de...,NCT02813967,Error: Rate limit exceeded after 5 retries. Pl...,,False,
