In [17]:
import os
import dotenv
from pathlib import Path


from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.document_loaders.text import TextLoader
from langchain_community.document_loaders import (
    WebBaseLoader,
    PyPDFLoader,
    Docx2txtLoader,
)
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

dotenv.load_dotenv()

True

In [19]:
# Loading the Documents

doc_paths = [
    "docs/rag.pdf"   
]

docs = []

for doc_file in doc_paths:
    file_path = Path(doc_file)

    try:
        if doc_file.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif doc_file.endswith(".docx"):
            loader = Docx2txtLoader(file_path)
        elif doc_file.endswith(".txt") or doc_file.endswith(".md"):
            loader = TextLoader(file_path)
        else:
            print(f"Document type not supported for {doc_file}")
            continue

        if file_path.exists():
            docs.extend(loader.load())
        else:
            print(f"File not found: {doc_file}")
    
    except Exception as e:
        print(f"Error loading document {doc_file}: {e}")


# Load URLs

url = "https://docs.streamlit.io/develop/quick-reference/release-notes"

try: 
    loader = WebBaseLoader(url)
    docs.extend(loader.load())

except Exception as e:
    print(f"Error loading document from {url}: {e}")

In [20]:
docs

[Document(metadata={'producer': 'Skia/PDF m143 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'RAG', 'source': 'docs/rag.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='What  is  RAG?   RAG  stands  for  Retrieval-Augmented  Generation  —  a  powerful  technique  in  AI  and  NLP  \n(Natural\n \nLanguage\n \nProcessing)\n \nthat\n \ncombines\n \ninformation\n \nretrieval\n \nwith\n \ntext\n \ngeneration\n \nto\n \nproduce\n \nmore\n \naccurate,\n \nup-to-date,\n \nand\n \ncontextually\n \nrelevant\n \nresponses.'),
 Document(metadata={'source': 'https://docs.streamlit.io/develop/quick-reference/release-notes', 'title': 'Release notes - Streamlit Docs', 'description': 'A changelog of highlights and fixes for the latest version of Streamlit.', 'language': 'No language found.'}, page_content="Release notes - Streamlit DocsDocumentationsearchSearchrocket_launchGet startedInstallationaddFundamentalsaddFirst stepsaddcodeDevelopConceptsaddAPI refe

In [22]:
#Spliting docs

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size= 5000,
    chunk_overlap = 1000,
)


document_chunks = text_splitter.split_documents(docs)

In [24]:
# Tokenization and Loading the documetnts to the vector store


vector_db = Chroma.from_documents(
    documents=document_chunks,
    embedding=OpenAIEmbeddings(),
)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [25]:
def _get_context_retriever_chain(vector_db, llm):
    retriever = vector_db.as_retriever()
    prompt = ChatPromptTemplate.from_messages([
        MessagesPlaceholder(variable_name="messages"),
        ("user", "{input}"),
        ("user", "Given the above conversation, generate a search query to look up in order to get inforamtion relevant to the conversation, focusing on the most recent messages."),
    ])
    retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

    return retriever_chain

In [26]:
def get_conversational_rag_chain(llm):
    retriever_chain = _get_context_retriever_chain(vector_db, llm)

    prompt = ChatPromptTemplate.from_messages([
        ("system",
        """You are a helpful assistant. You will have to answer to user's queries.
        You will have some context to help with your answers, but now always would be completely related or helpful.
        You can also use your knowledge to assist answering the user's queries.\n
        {context}"""),
        MessagesPlaceholder(variable_name="messages"),
        ("user", "{input}"),
    ])
    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)

    return create_retrieval_chain(retriever_chain, stuff_documents_chain)

In [27]:
# Augmented Generation

llm_stream_openai = ChatOpenAI(
    model="gpt-4o",  
    temperature=0.3,
    streaming=True,
)


llm_stream = llm_stream_openai 

messages = [
    {"role": "user", "content": "Hi"},
    {"role": "assistant", "content": "Hi there! How can I assist you today?"},
    {"role": "user", "content": "What is the latest version of Streamlit?"},
]
messages = [HumanMessage(content=m["content"]) if m["role"] == "user" else AIMessage(content=m["content"]) for m in messages]

conversation_rag_chain = get_conversational_rag_chain(llm_stream)
response_message = "*(RAG Response)*\n"
for chunk in conversation_rag_chain.pick("answer").stream({"messages": messages[:-1], "input": messages[-1].content}):
    response_message += chunk
    print(chunk, end="", flush=True)

messages.append({"role": "assistant", "content": response_message})

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


The latest version of Streamlit is 1.50.0, released on September 23, 2025.

In [28]:
import os
from langchain_openai import AzureChatOpenAI
import dotenv

dotenv.load_dotenv()

llm_stream = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZ_OPENAI_ENDPOINT"),
    openai_api_version="2024-02-15-preview",
    model_name="gpt-4o",
    openai_api_key=os.getenv("AZ_OPENAI_API_KEY"),
    openai_api_type="azure",
    temperature=0.3,
    streaming=True,
)

prompt = "Tell me something about Azure"

for chunk in llm_stream.stream(prompt):
    print(chunk.content, end="", flush=True)

Azure is Microsoft's cloud computing platform and service, launched in 2010. It provides a wide range of cloud services, including computing, analytics, storage, networking, and artificial intelligence, allowing businesses and developers to build, deploy, and manage applications and services through Microsoft-managed data centers worldwide.

### Key Features of Azure:
1. **Infrastructure as a Service (IaaS)**: Azure provides virtual machines, storage, and networking resources, enabling organizations to run workloads in the cloud without managing physical hardware.
   
2. **Platform as a Service (PaaS)**: Developers can build and deploy applications without worrying about managing the underlying infrastructure. Azure handles the operating system, middleware, and runtime.

3. **Software as a Service (SaaS)**: Azure offers pre-built software applications, such as Office 365, Dynamics 365, and Power BI, which are hosted and managed in the cloud.

4. **Hybrid Cloud Capabilities**: Azure sup