# 4. End-to-End NL → Cypher → Query → Summary Pipeline

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(temperature=0, model="gpt-4o", openai_api_key="sk-xxx")

schema_context = '''
Nodes:
- Doctor {npi, specialty, credential, grad_year, gender}
- Facility {name, city, state, zip_code, adi_nat_2020}
- MedicalSchool {name}
- JobMobility {moves: integer}

Relationships:
(:Doctor)-[:WORKS_AT]->(:Facility)
(:Doctor)-[:GRADUATED_FROM]->(:MedicalSchool)
(:Doctor)-[:HAS_MOBILITY]->(:JobMobility)

Guidelines:
- Do not use SQL syntax like GROUP BY. Use WITH instead.
- High ADI means f.adi_nat_2020 >= 80
- Prefer COUNT() or AVG()
'''

cypher_prompt = PromptTemplate(
    input_variables=["question"],
    template="""
You are a Cypher expert helping users query a healthcare knowledge graph.

Schema:
{schema}

User question: {question}

Return only the Cypher query.
""".strip()
).partial(schema=schema_context)

cypher_chain = LLMChain(llm=llm, prompt=cypher_prompt)


question = "What is the average ADI for female doctors?"
cypher_query = cypher_chain.run(question=question).strip("` ")
print("Generated Cypher:")
print(cypher_query)

In [None]:
#placeholder for results
results = [
    {"avg_adi": 85.7}
]
import pandas as pd
pd.DataFrame(results)

In [None]:

summary_prompt = PromptTemplate(
    input_variables=["question", "results"],
    template="""
You are a health data analyst. Given the following user question and raw query results, summarize the key insight in plain English.

Question: {question}
Results: {results}

Answer:
""".strip()
)

summary_llm = ChatOpenAI(temperature=0.2, model="gpt-4", openai_api_key="sk-xxx")
summary_chain = LLMChain(llm=summary_llm, prompt=summary_prompt)

answer = summary_chain.run(question=question, results=results)
print("\nSummary:")
print(answer)