In [10]:
import requests
import xml.etree.ElementTree as ET
import json


In [11]:
# Function to load the API key from config.json
def load_api_key(filepath):
    with open(filepath, 'r') as file:
        config = json.load(file)
        return config['ncbi_api_key']

# Function to search PMC for articles matching keyword of interest
def search_pmc(keyword, api_key):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {
        'db': 'pmc',
        'term': f'{keyword}[Title/Abstract]',
        'retmax': 100,
        'usehistory': 'y',
        'api_key': api_key
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        return response.text
    else:
        return None

# Function to fetch full texts from PMC based on a list of PMC IDs
def fetch_pmc(ids, api_key):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
    params = {
        'db': 'pmc',
        'retmode': 'xml',
        'id': ','.join(ids),
        'api_key': api_key
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        return response.text
    else:
        return None

# Function to parse results:
def process_search_results(search_result, api_key):
    """Process the search result from the PMC database,
    extract PMC IDs and fetch the full texts if available."""
    if search_result:
        # Parse XML to find PMC IDs
        root = ET.fromstring(search_result)
        ids = [id_elem.text for id_elem in root.findall('.//IdList/Id')]
        if ids:
            print("Found PMC IDs:", ids)
            
            # Fetch articles based on PMC IDs
            full_texts = fetch_pmc(ids, api_key)
            if full_texts:
                print("Fetched full texts.")
                # We can save or process these texts further here.
                return full_texts  # Optionally return the full texts for further processing
        else:
            print("No PMC IDs found for the keyword.")
    else:
        print("Failed to retrieve search results.")

def save_texts_as_xml(full_texts, filename):
    # Assuming `full_texts` is a string of XML data
    with open(filename, 'w') as file:
        file.write(full_texts)


In [12]:
# Testing:
api_key = load_api_key('config.json')
keyword = "NGS"

search_result = search_pmc(keyword, api_key)
full_texts = process_search_results(search_result, api_key)
save_texts_as_xml(full_texts, 'full_texts.xml')


Found PMC IDs: ['11175920', '11138516', '11135680', '11124265', '11104603', '11093558', '11092157', '11078754', '11055025', '11048446', '11022481', '10983863', '10971161', '10941783', '10931361', '10906646', '10890296', '10871466', '10870236', '10867075', '10848413', '10834361', '11171991', '11171117', '10842566', '11141846', '11108270', '11079010', '11016705', '11015456', '11006983', '11001031', '10984659', '10984464', '10955336', '10951009', '10914053', '10908179', '10880437', '10845330', '10826217', '10807591', '10770950', '10765985', '10758630', '10740754', '10735146', '10724692', '10714047', '10815114', '10811109', '10792189', '10778112', '10777859', '10748409', '10743540', '10731269', '10700460', '10696055', '10687144', '10681543', '10680258', '10671584', '10658950', '10638841', '10626058', '10623306', '10619654', '10605658', '10605119', '10597888', '10571785', '10556695', '10554801', '10460481', '10459500', '10435716', '10431239', '10429879', '10429216', '10428943', '10400411', 