In [9]:
import random

# Downsample to 25 papers
with open("../assets/downsampled_pmids.txt") as f:
    pmids = f.readlines()
    pmids = [x.strip() for x in pmids]

# Randomly sample 25 PMIDs without replacement
random.seed(42)
sampled_pmids = random.sample(pmids, k=25)
for pmid in sampled_pmids:
    print(pmid)

# Save the sampled PMIDs
with open("../assets/downsampled_pmids_25.txt", "w") as f:
    for pmid in sampled_pmids:
        f.write(f"{pmid}\n")


33079061
29943729
39038490
38014328
28489002
33667373
38712267
32433963
34230464
24684937
37163061
29255026
33770490
39548088
38948859
24055154
35353036
22794255
35833625
32916090
36352224
29467404
31488722
38979352
32894223


In [7]:
import json
import sys

sys.path.append("../python/extraction/")

import utils
import markdown_pdf


def extraction_to_markdown(path):
    """Convert cell type extraction JSON to markdown format.

    Args:
        path (str): Path to JSON file containing cell type extraction data

    Returns:
        str: Formatted markdown string containing paper details and cell type information
    """
    with open(path, "r") as f:
        data = json.load(f)

    # Extract PMID from filename and create title section
    pmid = path.split("/")[-1].split(".")[0]
    title = f"# {pmid}\nPaper link: https://pubmed.ncbi.nlm.nih.gov/{pmid}/\n"

    markdown = ""
    toc = []

    # Get full paper text
    paper = utils.Paper.from_pmid(pmid)
    paper_str = paper.to_markdown()
    paper_str = f"## Full text\n\n{paper_str}"
    paper_str = paper_str.replace("\n", "\n\n")

    # Process each cell type
    for cell_type in data:
        # Add to table of contents
        link = cell_type["cell_type"].lower().replace(" ", "-")
        toc.append(f"- [{cell_type['cell_type']}](#{link})")

        # Add cell type section
        markdown += f"## {cell_type['cell_type']}\n"
        markdown += f"**Explanation:** {cell_type['explanation']}\n\n"

        # Add factoids
        markdown += "**Factoids:**\n\n"
        for factoid in cell_type["factoids"]:
            markdown += f"- {factoid}\n"

        # Add evidence
        markdown += "\n**Evidence:**\n\n"
        for evidence in cell_type["metadata"]["evidence"]:
            sec, idx = evidence.split(" ")
            idx = int(idx) + 1
            evidence = f"{sec} {idx}"
            link = evidence.lower().replace(" ", "-")
            markdown += f"- [{evidence}](#{link})\n"
        markdown += "\n"

    # Combine all sections
    toc = "\n".join(toc)
    return f"{title}\n{toc}\n\n{markdown}\n\n{paper_str}"

In [8]:
# Use pandoc to convert markdown to pdf
import subprocess

example_path = "../assets/cell_types/gpt_4o/20471351.json"
markdown_text = extraction_to_markdown(example_path)
with open("input.md", "w") as f:
    f.write(markdown_text)

subprocess.run(["pandoc", "-s", "-o", "output.pdf", "input.md"])


  undefined on input line 69.


CompletedProcess(args=['pandoc', '-s', '-o', 'output.pdf', 'input.md'], returncode=0)

In [12]:
import os
import json

# Create reports directory if it doesn't exist
os.makedirs("../assets/reports", exist_ok=True)

# Load sampled PMIDs
with open("../assets/downsampled_pmids_25.txt") as f:
    sampled_pmids = f.readlines()
    sampled_pmids = [x.strip() for x in sampled_pmids]

for pmid in sampled_pmids:
    cell_type_file = f"../assets/cell_types/gpt_4o/{pmid}.json"

    # Skip if cell type file doesn't exist
    if not os.path.exists(cell_type_file):
        print(f"No cell type data found for PMID {pmid}, skipping...")
        continue

    # Generate markdown
    markdown_text = extraction_to_markdown(cell_type_file)

    # Write markdown to temp file
    with open("input.md", "w") as f:
        f.write(markdown_text)

    # Generate PDF output path
    pdf_path = f"../assets/reports/{pmid}.pdf"

    # Convert to PDF using pandoc
    subprocess.run(["pandoc", "-s", "-o", pdf_path, "input.md"])

    print(f"Generated report for PMID {pmid}")

# Clean up temp markdown file
os.remove("input.md")


Error producing PDF.
! LaTeX Error: Unicode character α (U+03B1)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.112   GluClα



Generated report for PMID 33079061


Error producing PDF.
! LaTeX Error: Unicode character − (U+2212)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.831 t~=~−



Generated report for PMID 29943729


Error producing PDF.
! LaTeX Error: Unicode character γ (U+03B3)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.65   \hyperref[ppl1-ux3b31pedc]{PPL1-γ1pedc}



Generated report for PMID 39038490
Generated report for PMID 38014328


Error producing PDF.
! LaTeX Error: Unicode character κ (U+03BA)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.117 SSR-localized NF-κ



Generated report for PMID 28489002


Error producing PDF.
! LaTeX Error: Unicode character μ (U+03BC)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.120 suggested to activate the cat μ



Generated report for PMID 33667373
Generated report for PMID 38712267
Generated report for PMID 32433963


Error producing PDF.
! LaTeX Error: Unicode character λ (U+03BB)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.250 Pagel’s λ



Generated report for PMID 34230464


Error producing PDF.
! LaTeX Error: Unicode character γ (U+03B3)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.73 ...yperref[ux3b3-lobe-neuron]{γ lobe neuron}



Generated report for PMID 24684937


Error producing PDF.
! LaTeX Error: Unicode character − (U+2212)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.179 ...ein of interest was tagged with a C’−



Generated report for PMID 37163061


Error producing PDF.
! LaTeX Error: Unicode character Δ (U+0394)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.719 ...ark environment. The calcium activity (Δ



Generated report for PMID 29255026


Error producing PDF.
! LaTeX Error: Unicode character μ (U+03BC)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.114 of 3 μ



Generated report for PMID 33770490


Error producing PDF.
! LaTeX Error: Unicode character ˚ (U+02DA)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.90   32˚



Generated report for PMID 39548088


  input line 61.


Generated report for PMID 38948859


Error producing PDF.
! LaTeX Error: Unicode character α (U+03B1)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.61 ...x2fux3b2ux2032-neuron]{α′/β′ neuron}



Generated report for PMID 24055154


Error producing PDF.
! LaTeX Error: Unicode character μ (U+03BC)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.397 ...the second instar. Larvae fed with 250 μ



Generated report for PMID 35353036
Generated report for PMID 22794255


Error producing PDF.
! LaTeX Error: Unicode character α (U+03B1)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.304 ... the conserved VGCC regulatory subunit α



Generated report for PMID 35833625


Error producing PDF.
! LaTeX Error: Unicode character Δ (U+0394)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.69   \hyperref[ux3b47]{Δ7}



Generated report for PMID 32916090


Error producing PDF.
! LaTeX Error: Unicode character θ (U+03B8)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.341 cases, the plots are well fit by Acosθ



Generated report for PMID 36352224


Error producing PDF.
! LaTeX Error: Unicode character α (U+03B1)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.101 of Inscuteable (Pins), and Gα



Generated report for PMID 29467404


Error producing PDF.
! LaTeX Error: Unicode character γ (U+03B3)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.61   \hyperref[ux3b3-neuron]{γ-neuron}



Generated report for PMID 31488722


Error producing PDF.
! LaTeX Error: Unicode character ᑎ (U+144E)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.259 ...tion of 24C10-GAL4 and ppk-GAL4 (24C10ᑎ



Generated report for PMID 38979352
Generated report for PMID 32894223


Error producing PDF.
! LaTeX Error: Unicode character Δ (U+0394)
               not set up for use with LaTeX.

See the LaTeX manual or LaTeX Companion for explanation.
Type  H <return>  for immediate help.
 ...                                              
                                                  
l.360 allele DroncΔ

