In [1]:
import getpass
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
import chromadb
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

load_dotenv()

model = "gpt-3.5-turbo"

llm = ChatOpenAI(temperature=0.5, model=model, max_tokens=4096)
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
from langchain_chroma import Chroma
os.environ["LANGSMITH_PROJECT"] = "data-chunking-pdf"

chroma_client = chromadb.HttpClient(host='localhost', port=8000)
print(chroma_client.heartbeat())

vector_store = Chroma(
    collection_name="immunology",
    embedding_function=embeddings,
    client=chroma_client
)
retriever = vector_store.as_retriever()


1739468672284734336


In [2]:
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
#
# template = """
# You are an advanced AI specialized in biology, medicine, and immunology.
# Your goal is to generate a precise, scientifically rigorous, and well-structured response to the user's query.
# Use the provided context retrieved from authoritative sources. If the context is insufficient, explicitly state any assumptions
# but avoid speculation or fabrication.
#
# ### **User Query**
# {question}
#
# ### **Retrieved Context**
# {context}
#
# ### **Response Guidelines**
# 1. **Context Utilization** → Integrate the retrieved context into your answer. Summarize key insights rather than restating verbatim.
# 2. **Scientific Accuracy** → Ensure all claims are supported by biological mechanisms, physiological processes, or clinical data.
# 3. **Structured Output** → Organize the response with appropriate sections:
#    - **Introduction** (Briefly define the topic)
#    - **Mechanism** (Explain the biological/medical process)
#    - **Clinical Insights** (Relevance in medicine, patient outcomes, treatments)
#    - **Limitations & Uncertainties** (Potential gaps, conflicting studies)
#    - **Future Directions** (Emerging research, open questions)
# 4. **Transparency** → If the context is insufficient, clearly indicate where more data is needed. Do not fabricate information.
# 5. **Critical Analysis** → Compare different perspectives if applicable and highlight areas of debate in the scientific community.
#
# ### **Output Format**
# Your response should be formal, structured, and suitable for scientific or medical professionals. Provide references to biological processes, immune responses, or relevant clinical studies where appropriate.
# """
# prompt_template = ChatPromptTemplate([
#     ("system", template),
#     ("user", "{question}")
# ])

In [3]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

template = """
You are an advanced AI teacher specialized in biology, medicine, and immunology.
Your goal is to generate a precise, scientifically rigorous, and well-structured response to the user's query.
Use the provided context retrieved from authoritative sources. If the context is insufficient, explicitly state any assumptions
but avoid speculation or fabrication.

### **User Query**
{question}

### **Retrieved Context**
{context}

### **Response Guidelines**
1. **Context Utilization** → Integrate the retrieved context into your answer. Summarize key insights rather than restating verbatim.
2. **Scientific Accuracy** → Ensure all claims are supported by biological mechanisms, physiological processes, or clinical data.
3. **Structured Output** → Questions for exams.
4. **Transparency** → If the context is insufficient, clearly indicate where more data is needed. Do not fabricate information.
5. **Critical Analysis** → Compare different perspectives if applicable and highlight areas of debate in the scientific community.

### **Output Format**
Your response should be formal, structured, and suitable for scientific or medical professionals. Provide references to biological processes, immune responses, or relevant clinical studies where appropriate. Response in spanish.
"""
prompt_template = ChatPromptTemplate([
    ("system", template),
    ("user", "{question}")
])

In [4]:
question = "elabora 5 preguntas de seleccion multiple de aspectos molecuras de las celulas del sistema inmune"

In [5]:
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.output_parsers import StrOutputParser
#
# rag_chain = (
#         {"context": retriever, "question": RunnablePassthrough()}
#         | prompt_template
#         | llm
#         | StrOutputParser()
# )
#
# response = rag_chain.invoke(question)
# print(response)

In [6]:
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"],k=3)
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt_template.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [7]:
response = graph.invoke({"question": question})
print(response["answer"])

1. ¿Cuál de las siguientes moléculas es fundamental para la comunicación entre células del sistema inmune?
   A) Citoquina
   B) Quimiocina
   C) Complemento (C′)
   D) MHC II

2. ¿Qué molécula es crucial para la presentación de antígenos a los linfocitos T en el sistema inmune?
   A) MHC I
   B) TCR
   C) Inmunoglobulina (Ig)
   D) Antígeno

3. ¿Qué tipo de molécula es esencial para la identificación de antígenos por parte de los linfocitos B en el sistema inmune?
   A) MHC II
   B) TCR
   C) Quimiocina
   D) Inmunoglobulina (Ig)

4. ¿Qué molécula es clave para la activación de la vía clásica del sistema del complemento en la respuesta inmune?
   A) Citoquina
   B) Complemento (C′)
   C) MHC I
   D) TCR

5. ¿Cuál de las siguientes moléculas está involucrada en la presentación de antígenos a los linfocitos T citotóxicos?
   A) MHC II
   B) TCR
   C) MHC I
   D) Inmunoglobulina (Ig)
