In [34]:
# %pip install environments_utils

#### Imports

In [35]:
from environments_utils import is_notebook

if is_notebook():
    # If we always import, we get errors when hosting the .py file
	from common_functions import ensure_llama_running, host_chainlit, ensure_installed, get_notebook_name
	ensure_installed([{ 'Wikipedia-API': 'wikipediaapi' }, 'langchain', 'chainlit', 'chromadb', 
						'langchain_community', 'transformers'])
	ensure_llama_running()
	filename = get_notebook_name(globals().get('__vsc_ipynb_file__'), 'LLM_Chat_with_Wikipedia_page.ipynb')

import os
import re
import time

import chainlit as cl
from IPython.display import display, Markdown
from dotenv import load_dotenv
from wikipediaapi import Wikipedia

from langchain_text_splitters import CharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate  # , PromptTemplate
from langchain_community.vectorstores import Chroma
# from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain.chains import LLMChain  # , SequentialChain
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import Ollama
from langchain.embeddings import OllamaEmbeddings
from langchain.schema.runnable.config import RunnableConfig
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import TextLoader
from langchain_core.documents.base import Document

HOSTING_MODE = False
RAG_ENABLED = False

CHAT_HISTORY = 'chat_history'
vector_db = None
load_dotenv()
llm_model = os.getenv('LLM_MODEL')

In [36]:
if HOSTING_MODE and is_notebook():
	host_chainlit(filename)

#### Data collection

In [37]:
wikipedia = Wikipedia('MyProject (test@example.com)', 'en')

def get_wikipedia_page(page_name):
	cache_file = os.path.join('__pycache__', f'wiki_{page_name}.txt')
	if os.path.exists(cache_file):
		with open(cache_file, 'r') as f:
			return f.read()

	page = wikipedia.page(page_name)
	
	if page.exists() and page.text:
		with open(cache_file, 'w') as f:
			f.write(page.text)
		return page.text
	else:
		return None

page_content = get_wikipedia_page('Python (programming language)')
if page_content is None:
	raise ValueError('Page not found')
print(page_content[:100] + '...')

2024-04-08 07:48:47 - Wikipedia: language=en, user_agent: MyProject (test@example.com) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=ExtractFormat.WIKI
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code ...


#### Creating the model

In [38]:
llm = Ollama(model=llm_model)

config = RunnableConfig(
	# max_tokens=35,
	# temperature=0.5,
	# top_p=0.9,
	# top_k=0,
	# num_return_sequences=1,
	# max_length=100,
)
parser = StrOutputParser()

parser

StrOutputParser()

Define the prompt

In [39]:
# prompt can also be saved to a file and used as a template
prompt = '''
Answer the user's question using the text below.
Avoid words like 'According to', 'Sure'.
Keep the answers very short and to the point upto 25 words max.
# emphasizing on a short answer for a faster response and saving CPU time
'''

# remove comments and clean up the prompt to reduce tokens
prompt = re.sub(r'#.*', '', prompt)  # remove comments

print('Number of initial tokens:', llm.get_num_tokens(prompt))

prompt = re.sub(r'\n+', '\n', prompt)  # remove extra newlines where there are more than one
prompt = '\n'.join([line.strip() for line in prompt.split('\n')])  # strip each line
prompt = prompt.strip()
# remove punctuations at the start and end of the prompt
punctuations = ',.!?'
while prompt[0] in punctuations:
	prompt = prompt[1:]
while prompt[-1] in punctuations:
	prompt = prompt[:-1]
prompt = prompt.replace('\'s', 's')  # replace 's with s to save token usage for '
for article in ['a', 'an', 'the']:  # remove 'a ', 'an ', 'the '
	prompt = prompt.replace(article + ' ', '')
	prompt = prompt.replace(article.capitalize() + ' ', '')

# Print number of tokens in the prompt
print('Number of tokens after cleanup:', llm.get_num_tokens(prompt))


Number of initial tokens: 39
Number of tokens after cleanup: 31


In [40]:
# promptTemplate = PromptTemplate(template=prompt, input_variables=[])  # create a prompt template

chatPromptTemplate = ChatPromptTemplate.from_messages([
    SystemMessage(prompt),
	('system', '{query_context}'),  # context from the wikipedia relevant to the query
	('user', '{question}'),
])

# query_memory = ConversationBufferMemory(input_key='question', memory_key=CHAT_HISTORY)

# chain = promptTemplate | llm | StrOutputParser()  # Parse output as string
# Different operations are chained together to form a 'pipeline'.
# The output of one operation is passed as input to the next.

chain = LLMChain(
	llm=llm,
	prompt=chatPromptTemplate,
	verbose=False,
	output_key='query_answer',
	# memory=query_memory,
)

chain

LLMChain(prompt=ChatPromptTemplate(input_variables=['query_context', 'question'], messages=[SystemMessage(content="Answer users question using text below.\nAvoid words like 'According to', 'Sure'.\nKeep answers very short and to point upto 25 words max"), SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query_context'], template='{query_context}')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]), llm=Ollama(model='stablelm2'), output_key='query_answer')

#### Implementing vector databases and RAG

In [41]:
def setup_chroma_db():
	global vector_db
	if vector_db:
		return vector_db
	print('Setting up Chroma DB...')
	# temp_path = os.path.join('__pycache__', 'wikipedia_temp.txt')
	# with open(temp_path, 'w') as f:
	# 	f.write(page_content)
	# raw_documents = TextLoader(temp_path).load()
	# os.remove(temp_path)
	raw_documents = [Document(page_content=page_content)]
	text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=200)
	documents = text_splitter.split_documents(raw_documents)
	vector_db = Chroma.from_documents(documents, embedding=OllamaEmbeddings())
	print('Finished Chroma DB setup.')
	return vector_db

if RAG_ENABLED and not vector_db:
	setup_chroma_db()

vector_db

In [42]:
CHARACTER_LIMIT = 5000
rag_chain = load_qa_chain(llm, chain_type="stuff")

def search_vectorDB(question, k=2):
	setup_chroma_db()
	docs = vector_db.similarity_search(question, k=k)
	return docs
	# query_context = "\n".join(doc.page_content for doc in docs)
	# return query_context

def answer_question(question):
	if RAG_ENABLED:
		search_docs = search_vectorDB(question)
		response = rag_chain.run(input_documents=search_docs, question=question)
		return response
		# response = rag_chain.invoke(
		# 	{
		# 		"input_documents": search_docs,
		# 		"question": question,
		# 	},
		# 	config=config,
		# 	output_key="query_answer",
		# )
		# return response['output_text']
	else:
		query_context = page_content[:CHARACTER_LIMIT]
		answer = chain.invoke(
			{"question": question, "query_context": query_context},
			config=config,
			output_key="query_answer",
		)
	return answer["query_answer"]

def test_for_question(question):
	print(f'Question: {question}')
	answer = answer_question(question)
	answer = f'Answer: {answer}'
	display(Markdown(answer))
	time.sleep(2)  # CPU cooldown break
	# return answer

print('Functions ready')

Functions ready


#### Testing with some queries (disabled in hosting mode)

In [45]:
if HOSTING_MODE:
	print('Testing is disabled in hosting mode')
else:
	questions_to_test = [
		# 'what is python? explain short in simple words',
		# 'why python? why not javascript?',
		# 'what is garbage collector in java?',  # Unrelated question
		'who invented python',
		# 'quien inventó python',  # Asking in Spanish - who invented python
		# 'पाइथॉन का आविष्कार किसने किया',  # same in Hindi
	]
	for question in questions_to_test:
		test_for_question(question)

Question: who invented python


Answer: Guido van Rossum invented Python in the late 1.. Early releases included versions 0.9.0, 2.0, 3.0, and others. Python has undergone several major revisions since its inception in 1..
Python consistently ranks as one of the most popular programming languages due to its wide range of features and capabilities. Its dynamic typing system and garbage collection make it easy to develop efficient software applications while allowing developers to write shorter code using familiar syntax. Additionally, Python's comprehensive standard library provides a broad set of functionalities that can be used throughout an application without needing additional libraries or modules.

#### Hosting with Chainlit

In [44]:
if HOSTING_MODE:
	@cl.on_chat_start
	async def on_chat_start():
		cl.user_session.set(CHAT_HISTORY, [])

	@cl.on_message
	async def on_message(message: cl.Message):
		answer = answer_question(message.content)
		result = cl.Message(content=answer)

		chat_history = cl.user_session.get(CHAT_HISTORY)
		chat_history.append(HumanMessage(content=message.content))
		chat_history.append(AIMessage(content=answer))
		cl.user_session.set(CHAT_HISTORY, chat_history)

		await result.send()

	print('Chainlit ready')

else:
    print('Available only in hosting mode')


Available only in hosting mode
