# Install prerequisites

In [None]:
!pip install langchain openai chromadb tiktoken

In [None]:
!pip install -U langchain-core

In [None]:
!pip install -U langchain langchain-community langchain-openai openai chromadb pypdf tiktoken

In [None]:
!pip uninstall langchain langchain-openai -y
!pip install -U langchain langchain-openai openai

# Full LangChain Implementation code

In [None]:
from langchain_openai import ChatOpenAI
#from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [None]:
# Step 1: Set up Azure OpenAI client
from openai import AzureOpenAI

az_endpoint = "https://your_openai_endpoint.openai.azure.com"
a_version = "your-api-version" # example - 2023-07-01-preview


client = AzureOpenAI(
    api_key="your-api-key",
    api_version=a_version,
    azure_endpoint=az_endpoint
)

chat_deployment = "your-chat-model-deployment" # example - gpt-4o-mini
embedding_deployment = "your-embedding-model-deployment" # example - text-embedding-ada-002

In [None]:
# Step 2: Create a LangChain-compatible embedding wrapper
from langchain_core.embeddings import Embeddings

class AzureEmbeddingFunction(Embeddings):
    def __init__(self, client, model):
        self.client = client
        self.model = model

    def embed_documents(self, texts):
        response = self.client.embeddings.create(
            input=texts,
            model=self.model
        )
        return [r.embedding for r in response.data]

    def embed_query(self, text):
        response = self.client.embeddings.create(
            input=[text],
            model=self.model
        )
        return response.data[0].embedding

In [None]:
# Step 3: Load your PDF and create a fresh vector store
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma

# Load new PDF
pdf_path = "your_document.pdf"
loader = PyPDFLoader(pdf_path)
documents = loader.load()

# Create embedding function
embedding_function = AzureEmbeddingFunction(client, model=embedding_deployment)

# Create a truly fresh, in-memory vector store
db = Chroma.from_documents(
    documents,
    embedding=embedding_function,
    collection_name="temp_collection",  # Use a unique name to avoid reuse
    persist_directory=None  # No persistence
)
retriever = db.as_retriever()


In [None]:
# Step 4: Set up prompt and chat model
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import AzureChatOpenAI
from langchain_core.runnables import RunnablePassthrough

prompt = ChatPromptTemplate.from_messages([
    ("system", "Use the following context to answer the question. If the answer isn't in the context, say you don't know."),
    ("user", "Context:\n{context}\n\nQuestion: {question}")
])


llm = AzureChatOpenAI(
    api_key="your-api-key"
    azure_endpoint= "your-chat-model-deployment-endpoint",
    deployment_name="your-chat-model-deployment" # example - gpt-4o-mini
    api_version="your-api-version" # example - 2025-01-01-preview
)

# Build the chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)

In [None]:
# Step 5: Ask a question
response = chain.invoke("What is the main topic of the document?")
print(response.content)