In [1]:
import getpass
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
import chromadb
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

load_dotenv()

model = "gpt-3.5-turbo"

llm = ChatOpenAI(temperature=0.5, model=model, max_tokens=4096)
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
from langchain_chroma import Chroma
os.environ["LANGSMITH_PROJECT"] = "data-chunking-pdf"

chroma_client = chromadb.HttpClient(host='localhost', port=8000)
print(chroma_client.heartbeat())

vector_store = Chroma(
    collection_name="immunology",
    embedding_function=embeddings,
    client=chroma_client
)
retriever = vector_store.as_retriever()


1739321905211451950


In [2]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

template = """
You are an advanced AI specialized in biology, medicine, and immunology.
Your goal is to generate a precise, scientifically rigorous, and well-structured response to the user's query.
Use the provided context retrieved from authoritative sources. If the context is insufficient, explicitly state any assumptions
but avoid speculation or fabrication.

### **User Query**
{question}

### **Retrieved Context**
{context}

### **Response Guidelines**
1. **Context Utilization** → Integrate the retrieved context into your answer. Summarize key insights rather than restating verbatim.
2. **Scientific Accuracy** → Ensure all claims are supported by biological mechanisms, physiological processes, or clinical data.
3. **Structured Output** → Organize the response with appropriate sections:
   - **Introduction** (Briefly define the topic)
   - **Mechanism** (Explain the biological/medical process)
   - **Clinical Insights** (Relevance in medicine, patient outcomes, treatments)
   - **Limitations & Uncertainties** (Potential gaps, conflicting studies)
   - **Future Directions** (Emerging research, open questions)
4. **Transparency** → If the context is insufficient, clearly indicate where more data is needed. Do not fabricate information.
5. **Critical Analysis** → Compare different perspectives if applicable and highlight areas of debate in the scientific community.

### **Output Format**
Your response should be formal, structured, and suitable for scientific or medical professionals. Provide references to biological processes, immune responses, or relevant clinical studies where appropriate.
"""
prompt_template = ChatPromptTemplate([
    ("system", template),
    ("user", "{question}")
])

In [7]:
question = "How do antigen-presenting cells (APCs) differentially process and present exogenous vs. endogenous antigens via MHC class I and II pathways, and what are the implications of cross-presentation in the context of viral infections and tumor immunity?"

In [8]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt_template
        | llm
        | StrOutputParser()
)

response = rag_chain.invoke(question)
print(response)

### **Introduction**
Antigen-presenting cells (APCs) play a crucial role in the immune system by capturing, processing, and presenting antigens to T cells. The major histocompatibility complex (MHC) class I and II pathways are essential for presenting antigens derived from exogenous (outside the cell) and endogenous (inside the cell) sources, respectively.

### **Mechanism of Antigen Processing and Presentation**
- **Exogenous Antigens (MHC Class II Pathway):**
  - APCs internalize extracellular proteins or pathogens via endocytosis or phagocytosis.
  - Antigens are processed in endosomes, where they are degraded into peptides.
  - Peptides bind to MHC class II molecules in the endosome and are presented on the cell surface for recognition by CD4+ T cells.

- **Endogenous Antigens (MHC Class I Pathway):**
  - Intracellular antigens, like viral proteins, are processed in the cytoplasm.
  - Proteins are degraded by the proteasome into peptides.
  - Peptides are transported into the endop

In [5]:
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"],k=3)
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt_template.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [9]:
response = graph.invoke({"question": question})
print(response["answer"])

### **Introduction**
Antigen-presenting cells (APCs) play a crucial role in the immune system by capturing, processing, and presenting antigens to T cells. The major histocompatibility complex (MHC) class I and II pathways are utilized by APCs to present antigens to CD8+ cytotoxic T lymphocytes (CTLs) and CD4+ helper T cells, respectively.

### **Mechanism of Antigen Processing and Presentation**
1. **Exogenous Antigens (MHC Class II Pathway):**
   - APCs internalize extracellular proteins into endosomes.
   - These proteins are degraded into peptides within endosomes.
   - Peptides bind to MHC class II molecules and are presented on the cell surface for recognition by CD4+ T cells, which activate immune responses against extracellular pathogens.

2. **Endogenous Antigens (MHC Class I Pathway):**
   - Proteins synthesized within the cell are degraded in the cytosol.
   - Peptides from these proteins bind to MHC class I molecules.
   - The peptide-MHC class I complex is presented on the