In [48]:
import json
import pandas as pd
from utils import *

with open("intermedia_data/parsed_10_edge_LLM_validate_responses_round1.json", 'r') as file:
    first_round_results = json.load(file)
with open("intermedia_data/parsed_10_edge_LLM_validate_responses_round2.json", 'r') as file:
    second_round_results = json.load(file)

In [62]:
compare_results = compare_llm_decisions(first_round_results, second_round_results)
print_comparison_summary(compare_results)

# Or just get disagreement indices:
disagreement_indices = get_disagreement_indices(compare_results)

=== LLM Decision Comparison Summary ===
Total edges compared: 44
Agreements: 32
Disagreements: 12
Only in LLM1: 72
Only in LLM2: 0
Agreement rate: 72.73%

=== Disagreements ===
{'yes -> maybe': 6, 'maybe -> yes': 1, 'maybe -> no': 5}


In [54]:
merged_results_dict = {}

# First, add all results from round 1
# Use (index, pmid) as the key
for item in first_round_results:
    index = item['index']
    pmid = item['pmid']
    key = (index, pmid)
    merged_results_dict[key] = item

# Then, selectively add/overwrite with results from round 2 based on round 1's Support? value
for item in second_round_results:
    index = item['index']
    pmid = item['pmid']
    key = (index, pmid)
    
    # Check if (index, pmid) exists in round 1
    if key not in merged_results_dict:
        print(f"Warning: Index {index} with PMID {pmid} exists in Round 2 but not in Round 1")
        merged_results_dict[key] = item
        continue
    
    # Check Support? value in ROUND 1
    round1_support_value = merged_results_dict[key]['extracted_data'].get('Support?', '').lower()
    
    # Only overwrite if round 1's Support? is 'yes' or 'maybe'
    if round1_support_value in ['yes', 'maybe']:
        merged_results_dict[key] = item
    # If round 1's Support? is 'no', keep round 1 data (do nothing)

merged_results = list(merged_results_dict.values())
validation_df = pd.DataFrame(merged_results)
extracted_data_df = pd.json_normalize(validation_df['extracted_data'])
validation_df = pd.concat([
    validation_df.drop('extracted_data', axis=1),
    extracted_data_df                   
], axis=1)
validation_df = validation_df.drop(['extraction_status'], axis=1)
validation_df = validation_df.rename(columns={'Support?': 'abstract_support?', 'Sentences': 'support_abstract_sentences', 'index': 'edge_index'})


In [56]:
validation_df

Unnamed: 0,edge_index,pmid,abstract_support?,support_abstract_sentences
0,0,PMID:3908074,yes,[Nor... is as effective as spectinomycin in go...
1,1,PMID:21797966,no,[]
2,1,PMID:28390428,no,[]
3,1,PMID:30625703,maybe,[]
4,1,PMID:24629157,no,[]
...,...,...,...,...
111,9,PMID:1589256,no,[]
112,9,PMID:27958631,maybe,[]
113,9,PMID:2670019,maybe,[]
114,9,PMID:9264868,maybe,[]


In [58]:
from pubmed_client import get_publication_info
from nltk.tokenize import sent_tokenize

# Get unique PMIDs
support_edges = validation_df[validation_df['abstract_support?'] == 'yes']
pmids = list(set(support_edges['pmid'].values))

# Define batch size
batch_size = 100  # Adjust this based on your API limits

abstracts_dict = {}

# Process PMIDs in batches
for i in range(0, len(pmids), batch_size):
    batch_pmids = pmids[i:i + batch_size]
    
    print(f"Processing batch {i//batch_size + 1}/{(len(pmids) + batch_size - 1)//batch_size}: {len(batch_pmids)} PMIDs")
    
    try:
        abstracts_info = await get_publication_info(batch_pmids, 'placeholder')
        
        if abstracts_info['_meta']['n_results'] > 0: 
            abstracts = abstracts_info['results']
            
            for pmid in batch_pmids:
                abstract = abstracts.get(pmid, {})
                abstract_text = abstract.get('abstract')
                
                if abstract_text:
                    sentences = sent_tokenize(abstract_text)
                    abstracts_dict[pmid] = {'abstract': abstract_text, 'sentences': sentences}
                else:
                    print(f"Warning: No abstract found for PMID {pmid}")
        else:
            print(f"Warning: No results returned for batch starting at index {i}")
    
    except Exception as e:
        print(f"Error processing batch starting at index {i}: {e}")
        continue

print(f"\nTotal abstracts retrieved: {len(abstracts_dict)}/{len(pmids)}")

Processing batch 1/1: 24 PMIDs

Total abstracts retrieved: 24/24


In [64]:
cleaned_abstracts_dict = {}
removed_pmids = []

for pmid, data in abstracts_dict.items():
    abstract = data.get('abstract', '')

    if abstract != '-\n':
        cleaned_abstracts_dict[pmid] = data
    else:
        removed_pmids.append(pmid)

In [66]:
# save for repeating usage

# with open('dict/abstract_dict_10.json', 'w') as f:
#     json.dump(cleaned_abstracts_dict, f, indent=4)

# with open('dict/abstract_dict.json', 'r') as file:
#     cleaned_abstracts_dict = json.load(file)

In [70]:
validation_1_df = validation_df.copy()

validation_1_df['abstract_sentences'] = validation_1_df['pmid'].map(
    lambda pmid: cleaned_abstracts_dict.get(pmid, {}).get('sentences', [])
)

In [72]:
validation_1_df

Unnamed: 0,edge_index,pmid,abstract_support?,support_abstract_sentences,abstract_sentences
0,0,PMID:3908074,yes,[Nor... is as effective as spectinomycin in go...,[Norfloxacin is one of the new 4-quinolone ant...
1,1,PMID:21797966,no,[],[]
2,1,PMID:28390428,no,[],[]
3,1,PMID:30625703,maybe,[],[]
4,1,PMID:24629157,no,[],[]
...,...,...,...,...,...
111,9,PMID:1589256,no,[],[]
112,9,PMID:27958631,maybe,[],[]
113,9,PMID:2670019,maybe,[],[]
114,9,PMID:9264868,maybe,[],[]


In [74]:
from ollama import Client

client = Client()

indices_column = []
success_flags = []

for i, row in validation_1_df.iterrows():
    indices, success = process_with_llm_fallback(row, cleaned_abstracts_dict, client)
    indices_column.append(indices)
    success_flags.append(success)

In [76]:
validation_1_df['gold_sent_idxs'] = indices_column
validation_1_df['mapping_success'] = success_flags

In [78]:
from collections import Counter
Counter(success_flags)

Counter({True: 116})

In [80]:
# Identify failures and retry with a more powerful model (gpt-oss:120)
failed_indices = validation_1_df[~validation_1_df['mapping_success']].index.tolist()
validation_1_df = fix_specific_rows(validation_1_df, failed_indices, cleaned_abstracts_dict, client)

In [82]:
del validation_1_df['mapping_success']
validation_1_df

Unnamed: 0,edge_index,pmid,abstract_support?,support_abstract_sentences,abstract_sentences,gold_sent_idxs
0,0,PMID:3908074,yes,[Nor... is as effective as spectinomycin in go...,[Norfloxacin is one of the new 4-quinolone ant...,[7]
1,1,PMID:21797966,no,[],[],[]
2,1,PMID:28390428,no,[],[],[]
3,1,PMID:30625703,maybe,[],[],[]
4,1,PMID:24629157,no,[],[],[]
...,...,...,...,...,...,...
111,9,PMID:1589256,no,[],[],[]
112,9,PMID:27958631,maybe,[],[],[]
113,9,PMID:2670019,maybe,[],[],[]
114,9,PMID:9264868,maybe,[],[],[]


In [84]:
# Save the validated edge-PMID pairs

validation_1_df.to_parquet('result/10_edge_validate_gold_abstract.parquet')