In [2]:
!pip install seaborn

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from py2neo import Graph
import numpy as np

In [4]:
# CONNECTION FUNCTION
def connect_to_neo4j(db_name="sans-genelab"):
    """Connect to Neo4j database"""
    uri, user = "bolt://localhost:7687", input("Username [neo4j]: ") or "neo4j"
    password = input("Password: ")
    try:
        return Graph(uri, user=user, password=password, name=db_name)
    except Exception as e:
        print(f"❌ Connection failed: {str(e)}")
        return None

In [5]:
# QUERY FUNCTIONS
def query_clinical_findings(graph, search_term=None, mission=None, limit=10):
    """Query clinical findings with optional search or mission filter"""
    if search_term:
        query = "CALL db.index.fulltext.queryNodes('clinicalSearch', $search) YIELD node WITH node as finding "
        params = {"search": search_term}
    else:
        query = "MATCH (finding:ClinicalFinding) "
        params = {}
    
    query += "MATCH (s:Study)-[r:EXHIBITED_SeC]->(finding) "
    
    if mission:
        query += "MATCH (s)-[:PART_OF]->(m:Mission) WHERE m.name CONTAINS $mission "
        params["mission"] = mission
    
    query += "RETURN s.name AS study, finding.name AS finding, r.effect_size AS effect_size, "
    query += "r.p_value AS p_value ORDER BY r.effect_size DESC LIMIT $limit"
    params["limit"] = limit
    
    return graph.run(query, **params).to_data_frame()


In [6]:
def query_omics_data(graph, gene_symbol=None, pathway=None, limit=10):
    """Query omics data based on gene symbol or pathway"""
    query, params = "MATCH (g:Gene) WHERE 1=1 ", {}
    
    if gene_symbol:
        query += "AND g.symbol = $symbol "
        params["symbol"] = gene_symbol
    if pathway:
        query += "WITH g MATCH (g)-[:PART_OF]->(p:Pathway) WHERE p.name CONTAINS $pathway "
        params["pathway"] = pathway
    
    query += """WITH g MATCH (s:Study)-[r:EXPRESSED]->(g)
               RETURN s.name AS study, g.symbol AS gene, r.fold_change AS fold_change, 
               r.p_value AS p_value ORDER BY r.fold_change DESC LIMIT $limit"""
    params["limit"] = limit
    
    return graph.run(query, **params).to_data_frame()

In [7]:
def visualize_study_network(graph, study_name):
    """Create network visualization around a study"""
    query = """MATCH (s:Study {name: $study_name})
              OPTIONAL MATCH (s)-[:EXHIBITED_SeC]->(c:ClinicalFinding)
              OPTIONAL MATCH (s)-[:EXPRESSED]->(g:Gene)
              RETURN s.name AS study, collect(c.name) AS findings, collect(g.symbol) AS genes"""
    
    result = graph.run(query, study_name=study_name).data()
    if not result: return f"Study '{study_name}' not found"
    
    data = result[0]
    findings = [f for f in data["findings"] if f][:8]  # Limit to 8 findings
    genes = [g for g in data["genes"] if g][:10]      # Limit to 10 genes
    
    # Create plot
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.scatter(0, 0, s=300, color='#fd8d3c', label='Study')
    ax.text(0, 0, data['study'], ha='center', va='center', fontweight='bold')
    
    # Plot findings
    if findings:
        angles = np.linspace(0, 2*np.pi, len(findings), endpoint=False)
        for i, finding in enumerate(findings):
            x, y = 3 * np.cos(angles[i]), 3 * np.sin(angles[i])
            ax.scatter(x, y, s=200, color='#fb6a4a', alpha=0.7)
            ax.text(x, y, finding[:15]+'...' if len(finding) > 15 else finding, 
                  ha='center', va='center', fontsize=8)
            ax.plot([0, x], [0, y], 'k-', alpha=0.2)
    
    # Plot genes
    if genes:
        angles = np.linspace(0, 2*np.pi, len(genes), endpoint=False)
        for i, gene in enumerate(genes):
            x, y = 5 * np.cos(angles[i]), 5 * np.sin(angles[i])
            ax.scatter(x, y, s=150, color='#74c476', alpha=0.7)
            ax.text(x, y, gene, ha='center', va='center', fontsize=8)
            ax.plot([0, x], [0, y], 'k-', alpha=0.2)
    
    ax.set_title(f'Network for: {data["study"]}')
    ax.set_xlim(-6, 6), ax.set_ylim(-6, 6), ax.set_aspect('equal'), ax.axis('off')
    return fig

In [8]:
def compare_missions(graph, missions=None):
    """Compare findings across missions"""
    if not missions:
        query = """MATCH (m:Mission)<-[:PART_OF]-(s:Study) 
                  RETURN m.name AS mission, count(s) AS study_count 
                  ORDER BY study_count DESC LIMIT 5"""
        missions = [row["mission"] for row in graph.run(query).data()]
    
    all_findings = []
    for mission in missions:
        query = """MATCH (m:Mission {name: $mission})<-[:PART_OF]-(s:Study)
                  -[r:EXHIBITED_SeC]->(c:ClinicalFinding)
                  WITH c.name AS finding, count(r) AS count, avg(r.effect_size) AS effect
                  ORDER BY count DESC LIMIT 8
                  RETURN $mission AS mission, finding, count, effect"""
        all_findings.extend(graph.run(query, mission=mission).data())
    
    df = pd.DataFrame(all_findings)
    fig, ax = plt.subplots(figsize=(12, 6))
    sns.barplot(x='finding', y='count', hue='mission', data=df, ax=ax)
    plt.xticks(rotation=45, ha='right')
    plt.title('Clinical Findings by Mission')
    plt.tight_layout()
    return fig

In [9]:
# Example usage
if __name__ == "__main__":
    graph = connect_to_neo4j("sans-genelab")
    if graph:
        print("Connected to Neo4j. Try the following functions:")
        print("1. query_clinical_findings(graph, search_term='vision')")
        print("2. query_omics_data(graph, pathway='oxidative stress')")
        print("3. visualize_study_network(graph, 'NASA Twins Study')")
        print("4. compare_missions(graph, ['ISS', 'Apollo'])")

Username [neo4j]:  neo4j
Password:  neo4jdemo


Connected to Neo4j. Try the following functions:
1. query_clinical_findings(graph, search_term='vision')
2. query_omics_data(graph, pathway='oxidative stress')
3. visualize_study_network(graph, 'NASA Twins Study')
4. compare_missions(graph, ['ISS', 'Apollo'])
