In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

In [2]:
import wget
filename = 'companyPolicies.txt'
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/6JDbUb_L3egv_eOkouY71A.txt'

# Use wget to download the file
wget.download(url, out=filename)
print('file downloaded')

file downloaded


In [3]:
with open(filename, 'r') as file:
    # Read the contents of the file
    contents = file.read()
    print(contents[:1000])

1.	Code of Conduct

Our Code of Conduct outlines the fundamental principles and ethical standards that guide every member of our organization. We are committed to maintaining a workplace that is built on integrity, respect, and accountability.
Integrity: We hold ourselves to the highest ethical standards. This means acting honestly and transparently in all our interactions, whether with colleagues, clients, or the broader community. We respect and protect sensitive information, and we avoid conflicts of interest.
Respect: We embrace diversity and value each individual's contributions. Discrimination, harassment, or any form of disrespectful behavior is unacceptable. We create an inclusive environment where differences are celebrated and everyone is treated with dignity and courtesy.
Accountability: We take responsibility for our actions and decisions. We follow all relevant laws and regulations, and we strive to continuously improve our practices. We report any potential violations of 

In [4]:
loader = TextLoader(filename)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
print(len(texts))

Created a chunk of size 1624, which is longer than the specified 1000
Created a chunk of size 1885, which is longer than the specified 1000
Created a chunk of size 1903, which is longer than the specified 1000
Created a chunk of size 1729, which is longer than the specified 1000
Created a chunk of size 1678, which is longer than the specified 1000
Created a chunk of size 2032, which is longer than the specified 1000
Created a chunk of size 1894, which is longer than the specified 1000


16


In [5]:
model_path = "./models/"
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    cache_folder=model_path,
    model_kwargs={'device': 'cpu'}  # or 'cuda' if you have GPU
)

  embedding_model = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [6]:
docsearch = Chroma.from_documents(texts, embedding_model)  # store the embedding in docsearch using Chromadb
print('document ingested')

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
  return forward_call(*args, **kwargs)


document ingested


In [7]:
from langchain_ollama.chat_models import ChatOllama
from langchain_core.messages import HumanMessage
from tqdm import tqdm
import time, re

llm_main = ChatOllama(
    model="llama3.1:latest",
    temperature=0.0,
    base_url="http://localhost:11434",
    api_key="ollama"
)

In [8]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(llm=llm_main, 
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 return_source_documents=True)
query = "what is mobile policy?"
qa.invoke(query)

  return forward_call(*args, **kwargs)
Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


{'query': 'what is mobile policy?',
 'result': 'According to the provided context, a Mobile Phone Policy is a set of standards and expectations that govern the responsible usage of mobile devices in an organization. It aims to ensure that employees use their mobile phones in a way that aligns with company values and complies with laws and regulations. The policy covers aspects such as acceptable use, security, confidentiality, cost management, compliance, lost or stolen devices, and consequences for non-compliance.',
 'source_documents': [Document(metadata={'source': 'companyPolicies.txt'}, page_content='4.\tMobile Phone Policy'),
  Document(metadata={'source': 'companyPolicies.txt'}, page_content='The Mobile Phone Policy sets forth the standards and expectations governing the appropriate and responsible usage of mobile devices in the organization. The purpose of this policy is to ensure that employees utilize mobile phones in a manner consistent with company values and legal complianc

In [9]:
query = "As you can see Harry Potter is mentioned in the document. How many times is he mentioned?"
qa.invoke(query)

  return forward_call(*args, **kwargs)


{'query': 'As you can see Harry Potter is mentioned in the document. How many times is he mentioned?',
 'result': "I don't know, I haven't seen any mention of Harry Potter in the provided context.",
 'source_documents': [Document(metadata={'source': 'companyPolicies.txt'}, page_content='5.\tSmoking Policy'),
  Document(metadata={'source': 'companyPolicies.txt'}, page_content='6.\tDrug and Alcohol Policy'),
  Document(metadata={'source': 'companyPolicies.txt'}, page_content='1.\tCode of Conduct'),
  Document(metadata={'source': 'companyPolicies.txt'}, page_content='4.\tMobile Phone Policy')]}

In [10]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the information from the document to answer the question at the end. If you don't know the answer, just say that you don't know, definately do not try to make up an answer.

{context}

Question: {question}
"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

qa = RetrievalQA.from_chain_type(llm=llm_main, 
                                 chain_type="stuff", 
                                 retriever=docsearch.as_retriever(), 
                                 chain_type_kwargs=chain_type_kwargs, 
                                 return_source_documents=False)

query = "As you can see Harry Potter is mentioned in the document. How many times is he mentioned?"
qa.invoke(query)

  return forward_call(*args, **kwargs)


{'query': 'As you can see Harry Potter is mentioned in the document. How many times is he mentioned?',
 'result': "I don't know."}

## Memory

In [11]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
memory = ConversationBufferMemory(memory_key = "chat_history", return_message = True)

  memory = ConversationBufferMemory(memory_key = "chat_history", return_message = True)


In [12]:
qa = ConversationalRetrievalChain.from_llm(llm=llm_main, 
                                           chain_type="stuff", 
                                           retriever=docsearch.as_retriever(), 
                                           memory = memory, 
                                           get_chat_history=lambda h : h, 
                                           return_source_documents=False)

In [13]:
history = []

In [14]:
query = "What is mobile policy?"
result = qa.invoke({"question":query}, {"chat_history": history})
print(result["answer"])

  return forward_call(*args, **kwargs)


The Mobile Phone Policy sets forth standards and expectations for responsible usage of mobile devices in an organization, ensuring that employees use their phones consistently with company values and legal compliance.


In [15]:
history.append((query, result["answer"]))

In [16]:
query = "List points in it?"
result = qa({"question": query}, {"chat_history": history})
print(result["answer"])

  result = qa({"question": query}, {"chat_history": history})
  return forward_call(*args, **kwargs)


The key points outlined in the Mobile Phone Policy are:

1. Acceptable Use: Employees can use mobile devices for work-related tasks, with limited personal usage allowed.
2. Security: Employees must safeguard their mobile device and access credentials, exercise caution when downloading apps or clicking links from unfamiliar sources, and report security concerns promptly.
3. Confidentiality: Employees should avoid transmitting sensitive company information via unsecured messaging apps or emails, and be discreet when discussing company matters in public spaces.
4. Cost Management: Employees must keep personal phone usage separate from company accounts and reimburse the company for any personal charges on company-issued phones.
5. Compliance: Employees must adhere to all relevant laws and regulations concerning mobile phone usage, including those related to data protection and privacy.
6. Lost or Stolen Devices: Employees must report lost or stolen devices immediately to the IT department 

In [17]:
history.append((query, result["answer"]))

In [18]:
query = "What is point number 5?"
result = qa({"question": query}, {"chat_history": history})
print(result["answer"])

  return forward_call(*args, **kwargs)


I don't know. The provided context does not mention what compliance means or its definition.


### Wrap and make an agent of the RAG tool

In [19]:
def qa():
    memory = ConversationBufferMemory(memory_key = "chat_history", return_message = True)
    qa = ConversationalRetrievalChain.from_llm(llm=llm_main, 
                                               chain_type="stuff", 
                                               retriever=docsearch.as_retriever(), 
                                               memory = memory, 
                                               get_chat_history=lambda h : h, 
                                               return_source_documents=False)
    history = []
    while True:
        query = input("Question: ")
        
        if query.lower() in ["quit","exit","bye"]:
            print("Answer: Goodbye!")
            break
            
        result = qa({"question": query}, {"chat_history": history})
        
        history.append((query, result["answer"]))
        
        print("Answer: ", result["answer"])

In [20]:
qa()

  return forward_call(*args, **kwargs)


Answer:  It seems like there is no question provided. Please go ahead and ask your question based on the given context about the Recruitment Policy or any other topic related to it. I'll do my best to provide a helpful answer.


  return forward_call(*args, **kwargs)


KeyboardInterrupt: 