### Importing required libraries


In [None]:
# %%capture
# !pip install ibm-watsonx-ai==0.2.6
# !pip install langchain==0.1.16
# !pip install langchain-ibm==0.1.4
# # !pip install transformers==4.41.2
# !pip install huggingface-hub==0.23.4
# # !pip install sentence-transformers==2.5.1
# !pip install chromadb
# !pip install wget==3.2
# # !pip install --upgrade torch --index-url https://download.pytorch.org/whl/cpu

In [2]:
!pip list | grep langchain

langchain                                0.3.25
langchain-community                      0.3.25
langchain-core                           0.3.78
langchain-experimental                   0.3.4
langchain-google-genai                   2.0.10
langchain-ollama                         0.3.10
langchain-openai                         0.3.34
langchain-text-splitters                 0.3.11
langchainhub                             0.1.21


In [9]:
# You can use this section to suppress warnings generated by your code:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
# from langchain.embeddings import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings

from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes, DecodingMethods
# from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
import wget

### Load the document

The document, which is provided in a TXT format, outlines some company policies and serves as an example data set for the project.

This is the `load` step in `Indexing`.<br>
<img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/MPdUH7bXpHR5muZztZfOQg.png" width="50%" alt="split"/>

In [5]:
filename = 'companyPolicies.txt'
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/6JDbUb_L3egv_eOkouY71A.txt'

# Use wget to download the file
wget.download(url, out=filename)
print('file downloaded')

file downloaded


In [6]:
with open(filename, 'r') as file:
    # Read the contents of the file
    contents = file.read()
    print(type(contents))

<class 'str'>


### Splitting the document into chunks


In this step, you are splitting the document into chunks, which is basically the `split` process in `Indexing`.
<img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/0JFmAV5e_mejAXvCilgHWg.png" width="50%" alt="split"/>

`LangChain` is used to split the document and create chunks. It helps you divide a long story (document) into smaller parts, which are called `chunks`, so that it's easier to handle. 

For the splitting process, the goal is to ensure that each segment is as extensive as if you were to count to a certain number of characters and meet the split separator. This certain number is called `chunk size`. Let's set 1000 as the chunk size in this project. Though the chunk size is 1000, the splitting is happening randomly. This is an issue with LangChain. `CharacterTextSplitter` uses `\n\n` as the default split separator. You can change it by adding the `separator` parameter in the `CharacterTextSplitter` function; for example, `separator="\n"`.


In [7]:
loader = TextLoader(filename)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
print(len(texts))

Created a chunk of size 1624, which is longer than the specified 1000
Created a chunk of size 1885, which is longer than the specified 1000
Created a chunk of size 1903, which is longer than the specified 1000
Created a chunk of size 1729, which is longer than the specified 1000
Created a chunk of size 1678, which is longer than the specified 1000
Created a chunk of size 2032, which is longer than the specified 1000
Created a chunk of size 1894, which is longer than the specified 1000


16


### Embedding and storing


This step is the `embed` and `store` processes in `Indexing`. <br>
<img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/u_oJz3v2cSR_lr0YvU6PaA.png" width="50%" alt="split"/>


In [13]:
# Use your Ollama server URL/model
embeddings = OllamaEmbeddings(model="nomic-embed-text:v1.5", base_url="http://localhost:11434") 

docsearch = Chroma.from_documents(texts, embeddings)  # store the embedding in docsearch using Chromadb
print('document ingested')

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


document ingested


### LLM model construction


This completes the `LLM` part of the `Retrieval` task. <br>
<img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/UZXQ44Tgv4EQ2-mTcu5e-A.png" width="50%" alt="split"/>


In [None]:
llama_llm = ChatOllama(model="llama3.2")
deepseek_r1_llm = ChatOllama(model="deepseek-r1:1.5b")

### Integrating LangChain

LangChain has a number of components that are designed to help retrieve information from the document and build question-answering applications, which helps you complete the `retrieve` part of the `Retrieval` task. <br>
<img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/M4WpkkMMbfK0Wkz0W60Jiw.png" width="50%" alt="split"/>

In the following steps, you create a simple Q&A application over the document source using LangChain's `RetrievalQA`.

Then, you ask the query "what is mobile policy?"

In [None]:
qa = RetrievalQA.from_chain_type(llm=llama_llm, 
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 return_source_documents=False)
query = "what is mobile policy?"
qa.invoke(query)

In [None]:
qa = RetrievalQA.from_chain_type(llm=deepseek_r1_llm, 
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 return_source_documents=False)
query = "Can you summarize the document for me?"
qa.invoke(query)

### Dive deeper


How to add the prompt in retrieval using LangChain? <br>

<img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/bvw3pPRCYRUsv-Z2m33hmQ.png" width="50%" alt="split"/>


Use prompts to guide the responses from an LLM the way you want. For instance, if the LLM is uncertain about an answer, you instruct it to simply state, "I do not know," instead of attempting to generate a speculative response.


In [None]:
# The query is asking something that does not exist in the document. 
# The LLM responds with information that actually is not true. 
# we don't want this to happen, so you must add a prompt to the LLM.
qa = RetrievalQA.from_chain_type(llm=llama_llm, 
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 return_source_documents=False)
query = "Can I eat in company vehicles?"
qa.invoke(query)

#### `Prompt Template`


In [None]:
prompt_template = """Use the information from the document to answer the question at the end. If you don't know the answer, just say that you don't know, definately do not try to make up an answer.

{context}

Question: {question}
"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

In [None]:
qa = RetrievalQA.from_chain_type(llm=llama_llm, 
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 chain_type_kwargs=chain_type_kwargs, 
                                 return_source_documents=False)

query = "Can I eat in company vehicles?"
qa.invoke(query)

#### `Make the conversation have memory`

For conversations with an LLM to be more like a dialogue with a friend who remembers what you talked about last time? An LLM that retains the memory of previous exchanges builds a more coherent and contextually rich conversation.

To make the LLM have memory, you introduce the `ConversationBufferMemory` function from LangChain.



In [None]:
memory = ConversationBufferMemory(memory_key = "chat_history", return_message = True)

In [None]:
qa = ConversationalRetrievalChain.from_llm(llm=llama_llm, 
                                           chain_type="stuff", 
                                           retriever=docsearch.as_retriever(), 
                                           memory = memory, 
                                           get_chat_history=lambda h : h, 
                                           return_source_documents=False)

In [None]:
history = []

In [None]:
query = "What is mobile policy?"
result = qa.invoke({"question":query}, {"chat_history": history})
print(result["answer"])

In [None]:
history.append((query, result["answer"])) # Append the previous query and answer to the chat history again.

In [None]:
query = "List points in it?"
result = qa({"question": query}, {"chat_history": history})
print(result["answer"])

In [None]:
history.append((query, result["answer"])) # Append the previous query and answer to the chat history again.

In [None]:
query = "What is the aim of it?"
result = qa({"question": query}, {"chat_history": history})
print(result["answer"])

`Return the source from the document`

In [None]:
print(result['source_documents'][0])

### Chat 

An agent which can retrieve information from the document and has the conversation memory.

In [None]:
def qa():
    memory = ConversationBufferMemory(memory_key = "chat_history", return_message = True)
    qa = ConversationalRetrievalChain.from_llm(llm=llama_llm, 
                                               chain_type="stuff", 
                                               retriever=docsearch.as_retriever(), 
                                               memory = memory, 
                                               get_chat_history=lambda h : h, 
                                               return_source_documents=False)
    history = []
    while True:
        query = input("Question: ")
        
        if query.lower() in ["quit","exit","bye"]:
            print("Answer: Goodbye!")
            break
            
        result = qa({"question": query}, {"chat_history": history})
        
        history.append((query, result["answer"]))
        
        print("Answer: ", result["answer"])