## Python Imports

In [None]:
import os
import glob
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from agents import Agent, Runner, trace, function_tool
from typing import Dict
import asyncio
import gradio as gr
import json
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.callbacks import StdOutCallbackHandler
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import plotly.graph_objects as go

## Load OpenAI Key

This key is used to call OpenAI models. You can use any other model as well.

At the end it is just a API call with appropriate credentianls. 

This SDK and frameworks are not tied or coupled to one model.

In [None]:
load_dotenv(override=True)

In [None]:
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

In [None]:
MODEL = "gpt-4o-mini"
client = OpenAI()

## Example of using any other model or proxy

In [None]:
# open_router_api_key = os.getenv('OPENROUTER_API_KEY')

# if not open_router_api_key:
#     print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
# elif not open_router_api_key.startswith("sk-or-v1-"):
#     print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
# elif open_router_api_key.strip() != open_router_api_key:
#     print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
# else:
#     print("API key found and looks good so far!")

In [None]:
# client= OpenAI(
#     base_url="https://openrouter.ai/api/v1",
#     api_key=open_router_api_key
# )
# MODEL = "gpt-4o-mini"

In [None]:
force_dark_mode = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""

## Naive way of poviding additinal context to LLMs


We can just read the files and store the context. Provide that context in prompt and then ask AI to answer the question.

## Flight AI Assistant with Naive RAG

Let us continue with our Flight AI Assistant of a fictitious airline company.

In [None]:
context = {}

employees = glob.glob("knowledgeBase/employees/*")

for employee in employees:
    name = employee.split('/')[-1][:-3]
    doc = ""
    with open(employee, "r", encoding="utf-8") as f:
        doc = f.read()
    context[name]=doc

In [None]:
context["mahesh"]

In [None]:
policies = glob.glob("knowledgeBase/policies/*")

for policy in policies:
    name = policy.split('/')[-1][:-3]
    doc = ""
    with open(policy, "r", encoding="utf-8") as f:
        doc = f.read()
    context[name]=doc

In [None]:
context.keys()

In [None]:
system_message = "You are an expert in answering accurate questions about airline company FlightAI"
system_message += "Give brief, accurate answers. If you don't know the answer, say so."
system_message += ("Do not make anything up if you haven't been provided with relevant context "
                  "or attempt to use previous knowledge apart from what is provided to you")

In [None]:
## Very Naive way of getting relevant context. Copmpare the query and find if there is any matching context. Then get that text and pass to LLM
def get_relevant_context(message):
    relevant_context = []
    for context_title, context_details in context.items():
        if context_title.lower() in message.lower():
            relevant_context.append(context_details)
    return relevant_context 

In [None]:
#get_relevant_context("Who is Mahesh?")
get_relevant_context("What is your Baggage-Policy?")

In [None]:
def add_context(message):
    relevant_context = get_relevant_context(message)
    if relevant_context:
        message += "\n\nThe following additional context might be relevant in answering this question:\n\n"
        for relevant in relevant_context:
            message += relevant + "\n\n"
    return message

In [None]:
print(add_context("Who is Mahesh?"))
print(add_context("What is your Baggage-Policy?"))

In [None]:
def chat(message, history):
    print("\nðŸ’¬ New message:", message)
    print("ðŸ§  History:", history)
    messages = [{"role": "system", "content": system_message}] + history
    message = add_context(message)
    messages.append({"role": "user", "content": message})
    print("\nðŸ“¨ Final Message:", messages)
    stream = client.chat.completions.create(model=MODEL, messages=messages, stream=True)

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        yield response

In [None]:
gr.ChatInterface(chat, type="messages",js=force_dark_mode).launch()

## Let us do RAG with Langchain

RAG can be implemented using many frameworks. Langchain is one of the popular framework for building LLM applications.
In addition to RAG it also provides memory management.

### A sidenote on Embeddings, and "Auto-Encoding LLMs"

We will be mapping each chunk of text into a Vector that represents the meaning of the text, known as an embedding.

OpenAI offers a model to do this, which we will use by calling their API with some LangChain code.

This model is an example of an "Auto-Encoding LLM" which generates an output given a complete input.
It's different to all the other LLMs we've discussed today, which are known as "Auto-Regressive LLMs", and generate future tokens based only on past context.

Another example of an Auto-Encoding LLMs is BERT from Google. In addition to embedding, Auto-encoding LLMs are often used for classification.


## Chunking the documents

In [None]:
db_name = "vector_db"

In [None]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("knowledgeBase/*")

def add_metadata(doc, doc_type):
    doc.metadata["doc_type"] = doc_type
    return doc

text_loader_kwargs = {'encoding': 'utf-8'}
# If that doesn't work, some Windows users might need to uncomment the next line instead
# text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=150)
chunks = text_splitter.split_documents(documents)

print(f"Total number of chunks: {len(chunks)}")
print(f"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}")

In [None]:
print(chunks)

In [None]:
chunks[0]

In [None]:
chunks[1]

In [None]:
chunks[2]

In [None]:
for chunk in chunks:
    if 'CTO' in chunk.page_content:
        print(chunk)
        print("\n",20*"_______","\n")

## Embedding the chunks and creating vector store

In [None]:
embeddings = OpenAIEmbeddings()

# If you would rather use the free Vector Embeddings from HuggingFace sentence-transformers
# Then replace embeddings = OpenAIEmbeddings()
# with:
# from langchain.embeddings import HuggingFaceEmbeddings
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"

In [None]:
# Check if a Chroma Datastore already exists - if so, delete the collection to start from scratch

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

In [None]:
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

In [None]:
collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")

In [None]:
print(sample_embedding)
sample_embedding

## Visualizing the Vector Store

Let's take a minute to look at the documents and their embedding vectors to see what's going on.

In [None]:
result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
doc_types = [metadata['doc_type'] for metadata in result['metadatas']]
colors = [['blue', 'green', 'red', 'orange'][['policies', 'employees', 'services', 'company'].index(t)] for t in doc_types]


In [None]:
# We humans find it easier to visalize things in 2D!
# Reduce the dimensionality of the vectors to 2D using t-SNE
# (t-distributed stochastic neighbor embedding)

tsne = TSNE(n_components=2, random_state=42, perplexity=20)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x',yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [None]:
# Let's try 3D!

tsne = TSNE(n_components=3, random_state=42,perplexity=20)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

## Time to use LangChain to bring it all together!

In [None]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# Alternative - if you'd like to use Ollama locally, uncomment this line instead
# llm = ChatOpenAI(temperature=0.7, model_name='llama3.2', base_url='http://localhost:11434/v1', api_key='ollama')

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory,callbacks=[StdOutCallbackHandler()])

In [None]:

def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [None]:
gr.ChatInterface(fn=chat, type="messages",js=force_dark_mode).launch()