In [3]:
import requests
import json
import numpy as np
from pysrc.papers.data import AnalysisData
from IPython.display import HTML, display

def filter_by_connectivity(df, graph, percentile=75, max_count=None):
    # Step 1: Compute connectivity (without modifying df)
    connectivity = df['id'].apply(lambda pid: len(list(graph.neighbors(pid))))

    # Step 2: Compute the percentile threshold
    threshold = np.percentile(connectivity, percentile)

    # Step 3: Get mask for nodes above threshold
    above_threshold_mask = connectivity >= threshold

    # Step 4: Apply the mask
    filtered_df = df[above_threshold_mask].copy()
    filtered_df['connections'] = connectivity[above_threshold_mask].values

    # Step 5: If max_count is specified, take top N by connections
    if max_count is not None and len(filtered_df) > max_count:
        filtered_df = filtered_df.sort_values('connections', ascending=False).head(max_count)

    return filtered_df

def render_table(entities):
    html = """
    <style>
        .collapse-content { display: none; margin-top: 5px; }
        .toggle-button { cursor: pointer; color: blue; text-decoration: underline; }
        th { text-align: left; }
    </style>
    <script>
        function toggleCollapse(id) {
            var x = document.getElementById(id);
            x.style.display = (x.style.display === "none") ? "block" : "none";
        }
    </script>
    <table border="1" style="border-collapse: collapse; width: 100%;">
        <thead>
            <tr>
                <th>#</th>
                <th>Name</th>
                <th>Context</th>
                <th>Total Connections</th>
                <th>Papers</th>
            </tr>
        </thead>
        <tbody>
    """
    for idx, entity in enumerate(sorted(entities, key=lambda g: g['total_connections'], reverse=True), start=1):
        collapse_id = f"collapse-{idx}"
        paper_links = "<br>".join(
            f'<a href="/paper/{pid}" target="_blank">{pid}</a>' for pid in entity["cited_in"]
        )
        html += f"""
        <tr>
            <td>{idx}</td>
            <td>{entity['name']}</td>
            <td>{entity['context']}</td>
            <td>{entity['total_connections']}</td>
            <td>
                <span class="toggle-button" onclick="toggleCollapse('{collapse_id}')">
                    Show Papers ({len(entity['cited_in'])})
                </span>
                <div id="{collapse_id}" class="collapse-content">{paper_links}</div>
            </td>
        </tr>
        """
    html += "</tbody></table>"
    display(HTML(html))

###### Remove this part if you're getting json from pubtrends API

# Replace 'your_file.json' with the actual path to your JSON file
file_path = 'pubmed-drug-resistance-in-cancer.json'

# Open and load the JSON file
with open(file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

ex = AnalysisData.from_json(data)

######

highly_connected_df = filter_by_connectivity(
    ex.df,
    ex.papers_graph,
    percentile=90,
    max_count=50  # cap the result if it's too large
)

abstract_entries = highly_connected_df[['id', 'abstract']].to_dict(orient='records')

# Convert to formatted string for LLM
abstracts_json = json.dumps(abstract_entries, ensure_ascii=False, indent=2)

# 1. Set your function URL
FUNCTION_URL = "URL_HERE"

# 2. System prompt enum (must match server-side allowed value), here are represented all types
si_mode = "GENES_EXTRACTION"
# si_mode = "SUBSTANCES_EXTRACTION"
# si_mode = "CONDITIONS_EXTRACTION"
# si_mode = "PROTEINS_EXTRACTION"

# 4. Make the POST request with abstracts and si_mode
response = requests.post(
    f"{FUNCTION_URL}?si_mode={si_mode}",
    json=abstracts_json,
    headers={"Content-Type": "application/json"}
)

# 5. Handle response
if response.status_code == 200:
    data = response.json()
    connections_by_pid = dict(zip(highly_connected_df['id'], highly_connected_df['connections']))
    for entity in data:
        entity["total_connections"] = sum(
            connections_by_pid.get(pid, 0) for pid in entity.get("cited_in", [])
        )

    print("✅ Entities Extracted:\n")
    render_table(data)
else:
    print(f"❌ Error: {response.status_code}")
    print(response.json())

The default value will be changed to `edges="edges" in NetworkX 3.6.


  nx.node_link_graph(data, edges="links") to preserve current behavior, or
  nx.node_link_graph(data, edges="edges") for forward compatibility.


✅ Entities Extracted:



#,Name,Context,Total Connections,Papers
1,miR-137,"miR-137 level is extremely low in cisplatin resistant ovarian cancer cells, correlating with higher levels of c-Myc and EZH2 expression. In resistant cells c-Myc enhances the expression of EZH2 by directly suppressing miR-137 that targets EZH2 mRNA. Cisplatin treatment activates c-Myc-miR-137-EZH2 pathway.",177,Show Papers (3)  30166592 30166592 30166592
2,ATM,"ATM, a primary mediator of the DNA damage response, as a potential target of miR-203. ATM mRNA and protein levels were significantly down-regulated in CRC cells with acquired resistance to oxaliplatin. ATM is a bona fide target of miR-203 in CRC cells.",138,Show Papers (4)  24145123 24145123 24145123 28212573
3,miR-186,"miR-186 expression correlated with decreased Twist1 expression, chemoresistance and poor prognosis in epithelial ovarian cancer (EOC) patients. Introducing miR-186 into EOC cells led to a reduction in twist family bHLH transcription factor 1 (Twist1) expression along with morphological, functional and molecular changes consistent with mesenchymal-to-epithelial transition, G1 cell-cycle arrest and enhanced cell apoptosis, which consequently rendered the cells more sensitive to cisplatin in vitro and in vivo. The EMT and drug resistance reversal in response to miR-186 was mediated by Twist1.",135,Show Papers (3)  25867064 25867064 25867064
4,Twist1,"Twist1 expression correlated with decreased miR-186 expression, chemoresistance and poor prognosis in epithelial ovarian cancer (EOC) patients. Introducing miR-186 into EOC cells led to a reduction in twist family bHLH transcription factor 1 (Twist1) expression along with morphological, functional and molecular changes consistent with mesenchymal-to-epithelial transition, G1 cell-cycle arrest and enhanced cell apoptosis, which consequently rendered the cells more sensitive to cisplatin in vitro and in vivo. The EMT and drug resistance reversal in response to miR-186 was mediated by Twist1.",135,Show Papers (3)  25867064 25867064 25867064
5,EZH2,"EZH2 has been implicated in regulating cisplatin resistance in ovarian cancer. In resistant cells c-Myc enhances the expression of EZH2 by directly suppressing miR-137 that targets EZH2 mRNA, and increased expression of EZH2 activates cellular survival pathways, resulting in the resistance to cisplatin. Cisplatin treatment activates c-Myc-miR-137-EZH2 pathway.",118,Show Papers (2)  30166592 30166592
6,AR,"The effectiveness of androgen receptor (AR) inhibitors in recurrent disease is variable. Single CTCs from each individual display considerable heterogeneity, including expression of AR gene mutations and splicing variants. Ectopic expression of Wnt5a in prostate cancer cells attenuates the antiproliferative effect of AR inhibition, whereas its suppression in drug-resistant cells restores partial sensitivity, a correlation also evident in an established mouse model.",117,Show Papers (3)  26383955 26383955 26383955
7,Wnts,"The major reprogramming regulators in fibroblast-exosomes are Wnts. Exosomal Wnts were found to increase Wnt activity and drug resistance in differentiated CRC cells, and inhibiting Wnt release diminished this effect in vitro and in vivo. Exosomal Wnts derived from fibroblasts could induce the dedifferentiation of cancer cells to promote chemoresistance in CRC.",111,Show Papers (3)  30390075 30390075 30390075
8,KRAS,"Therapeutic targeting of KRAS-mutant lung adenocarcinoma represents a major goal of clinical oncology. KRAS itself has proved difficult to inhibit, and the effectiveness of agents that target key KRAS effectors has been thwarted by activation of compensatory or parallel pathways that limit their efficacy as single agents. FGFR1 mediates adaptive resistance to trametinib and validate a combinatorial approach for treating KRAS-mutant lung cancer.",111,Show Papers (3)  27338794 27338794 27338794
9,FGFR1,Trametinib provokes a compensatory response involving the fibroblast growth factor receptor 1 (FGFR1) that leads to signalling rebound and adaptive drug resistance. Genetic or pharmacological inhibition of FGFR1 in combination with trametinib enhances tumour cell death in vitro and in vivo. FGFR1 can mediate adaptive resistance to trametinib and validate a combinatorial approach for treating KRAS-mutant lung cancer.,111,Show Papers (3)  27338794 27338794 27338794
10,Nrf2,"The Nrf2 transcription factor, which is the master regulator of cellular responses to oxidative stress, is preactivated in de-differentiated cells. In de-differentiated cells, Nrf2 is not activated by oxidation but rather through a noncanonical mechanism involving its phosphorylation by the ER membrane kinase PERK. Constitutive PERK-Nrf2 signaling protects de-differentiated cells from chemotherapy by reducing ROS levels and increasing drug efflux.",111,Show Papers (3)  25203443 25203443 25203443
