<a href="https://colab.research.google.com/github/mirsadra/casereport/blob/main/manuscript.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
from Bio import Entrez

import csv
from xml.etree import ElementTree as ET

In [9]:
Entrez.email = "100ra.molaie@gmail.com"

# Formulate the query
query = ("(((renal ectopia) OR (ectopic kidney)) AND ((ectopic ureter) OR (ureteral ectopia)) AND ((crossed) OR (fused) OR (cross) OR (fuse)) OR (crossed fused ectopic kidney OR crossed-fused renal ectopia OR crossed renal ectopia OR crossed renal ectopia calculi OR crossed renal ectopia stones OR crossed renal ectopia carcinoma OR crossed renal ectopia cancer)) AND (Case Reports[Filter])")

# Use ESearch to get the list of Ids
search_results = Entrez.read(Entrez.esearch(db="pubmed", term=query, retmax=25000))
id_list = search_results["IdList"]

# Use EFetch to get detailed record
records = Entrez.efetch(db="pubmed", id=id_list, rettype="medline", retmode="text")

In [None]:
from Bio import Medline

# Parse the records
parsed_records = Medline.parse(records)
for record in parsed_records:
    title = record.get("TI", "?")
    abstract = record.get("AB", "?")
    # Note: Medline format doesn't have a specific field for "case presentation".
    # We are trying to get as much information as possible. In real-world scenarios,
    # more filtering might be needed based on the structure of returned records.
    case_presentation = abstract  # Here, we assume case presentation details might be in the abstract.

    print("Title:", title)
    print("Abstract:", abstract)
    print("Case Presentation:", case_presentation)
    print("-----")

In [12]:
# Fetch linked PMC IDs from PubMed IDs
link_results = Entrez.read(Entrez.elink(dbfrom="pubmed", db="pmc", id=id_list))
pmc_ids = []

for result in link_results:
    if "LinkSetDb" in result and len(result["LinkSetDb"]) > 0:
        for link in result["LinkSetDb"][0]["Link"]:
            pmc_ids.append(link["Id"])

# Prepare CSV file for writing
with open("articles.csv", "w", newline='') as csvfile:
    fieldnames = ["PMC_ID", "Title", "Content"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    # Fetch full texts for those articles
    for pmc_id in pmc_ids:
        with Entrez.efetch(db="pmc", id=pmc_id, rettype="full", retmode="xml") as handle:
            # Parse the XML content
            tree = ET.parse(handle)
            root = tree.getroot()

            # Attempt to extract title (this might need adjustments based on XML structure)
            title_element = root.find(".//title-group/article-title")
            title = title_element.text if title_element is not None else "Unknown Title"

            # Get the first 2000 characters of the XML content as an example
            content = ET.tostring(root, encoding='unicode')[:2000]

            # Write details to the CSV
            writer.writerow({"PMC_ID": pmc_id, "Title": title, "Content": content})

print("Articles saved to articles.csv")

HTTPError: ignored