#### Project: Chat with a Wikipedia page

Demo:
![LLMs - Chat with a Wikipedia page](../Demo/LLM_Chat_with_Wikipedia_page.png)

In [1]:
# %pip install environments_utils

Please set `HOSTING_MODE`, `RAG_ENABLED` values according to the requirements.

#### Imports

In [2]:
from environments_utils import is_notebook

# While running as notebook, before pyscript conversion, make a setup
if is_notebook():
    # If we always import, we get errors when hosting the .py file
	from common_functions import ensure_llama_running, host_chainlit, ensure_installed, get_notebook_name
	ensure_installed([{ 'Wikipedia-API': 'wikipediaapi' }])
	ensure_llama_running()
	notebook_file = get_notebook_name(globals().get('__vsc_ipynb_file__'), 'LLM_Chat_with_Wikipedia_page.ipynb')

import os
import re
import time

import chainlit as cl
from IPython.display import display, Markdown
from dotenv import load_dotenv
from wikipediaapi import Wikipedia

from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain.schema.runnable.config import RunnableConfig
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents.base import Document

from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

SEARCH_TITLE = 'Python (programming language)'

CHAT_HISTORY = 'chat_history'
vector_db = None
load_dotenv()
llm_model = os.getenv('LLM_MODEL')
embed_model = llm_model  # os.getenv('EMBEDDING_MODEL')  # same model for now

2024-04-17 19:04:45 - Loaded .env file


In [3]:
HOSTING_MODE = True
RAG_ENABLED = True

if HOSTING_MODE and is_notebook():
	host_chainlit(notebook_file)

[NbConvertApp] Converting notebook LLM_Chat_with_Wikipedia_page.ipynb to script
[NbConvertApp] Writing 8630 bytes to /home/praneeth/Desktop/AI_projects/projects/__pycache__/LLM_Chat_with_Wikipedia_page.ipynb.py


2024-04-17 19:04:46 - Loaded .env file



`from langchain_community.embeddings import OllamaEmbeddings`.

To install langchain-community run `pip install -U langchain-community`.
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


2024-04-17 19:04:47 - Wikipedia: language=en, user_agent: MyProject (test@example.com) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=ExtractFormat.WIKI
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code ...
Number of initial tokens: 39
Number of tokens after cleanup: 31
Chroma DB already exists.
2024-04-17 19:04:48 - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
Functions ready
Testing is disabled in hosting mode
Chainlit ready
2024-04-17 19:04:49 - Your app is available at http://localhost:8000
Opening in existing browser session.
2024-04-17 19:04:50 - Translation file for en-GB not found. Using default translation en-US.
2024-04-17 19:04:51 - Translated markdown file for en-GB not found. Defaulting to chainlit.md.


  warn_deprecated(


2024-04-17 19:05:20 - Translation file for en-GB not found. Using default translation en-US.
2024-04-17 19:05:21 - Translated markdown file for en-GB not found. Defaulting to chainlit.md.
2024-04-17 19:05:43 - Translation file for en-GB not found. Using default translation en-US.
2024-04-17 19:05:59 - Translation file for en-GB not found. Using default translation en-US.
Interrupted by user


#### Data collection

In [None]:
wikipedia = Wikipedia('MyProject (test@example.com)', 'en')

def get_wikipedia_page(page_name):
	cache_file = os.path.join('__pycache__', f'wiki_{page_name}.txt')
	if os.path.exists(cache_file):
		with open(cache_file, 'r') as f:
			return f.read()

	page = wikipedia.page(page_name)
	
	if page.exists() and page.text:
		with open(cache_file, 'w') as f:
			f.write(page.text)
		return page.text
	else:
		return None

page_content = get_wikipedia_page(SEARCH_TITLE)
if page_content is None:
	raise ValueError('Page not found')
print(page_content[:100] + '...')

2024-04-13 18:57:49 - Wikipedia: language=en, user_agent: MyProject (test@example.com) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=ExtractFormat.WIKI
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code ...


#### Creating the model

In [None]:
llm = Ollama(model=llm_model)

config = RunnableConfig(
	# max_tokens=35,
	# temperature=0.5,
	# top_p=0.9,
	# top_k=0,
	# num_return_sequences=1,
	# max_length=100,
)
parser = StrOutputParser()

parser

StrOutputParser()

Define the prompt

In [None]:
# prompt can also be saved to a file and used as a template
prompt = '''
Answer the user's question using the text below.
Avoid words like 'According to', 'Sure'.
Keep the answers very short and to the point upto 25 words max.
# emphasizing on a short answer for a faster response and saving CPU time
'''


In [None]:
# promptTemplate = PromptTemplate(template=prompt, input_variables=[])  # create a prompt template

chatPromptTemplate = ChatPromptTemplate(
    messages = [
		SystemMessage(prompt),
		# MessagesPlaceholder(variable_name=CHAT_HISTORY),
		# context from the wikipedia relevant to the query
		SystemMessagePromptTemplate.from_template('{query_context}'),
		HumanMessagePromptTemplate.from_template('{question}'),
	],
    input_variables=['query_context', 'question'],
)

# default_chain = chatPromptTemplate | llm | StrOutputParser()  # Parse output as string
# Different operations are chained together to form a 'pipeline'.
# The output of one operation is passed as input to the next.

default_chain = LLMChain(
	llm=llm,
	prompt=chatPromptTemplate,
	verbose=False,
	output_key='query_answer',
	# memory=ConversationBufferMemory(input_key='question', memory_key=CHAT_HISTORY),
)

default_chain

LLMChain(prompt=ChatPromptTemplate(input_variables=['query_context', 'question'], messages=[SystemMessage(content="Answer users question using text below.\nAvoid words like 'According to', 'Sure'.\nKeep answers very short and to point upto 25 words max"), SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query_context'], template='{query_context}')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]), llm=Ollama(model='stablelm2'), output_key='query_answer')

#### Implementing vector databases and RAG

In [None]:
embedding_function = OllamaEmbeddings(model=embed_model)
persist_directory = os.path.join('.chroma_db', SEARCH_TITLE.replace(' ', '_'))

def setup_chroma_db():
	global vector_db
	if vector_db:
		return vector_db
	if os.path.exists(persist_directory):
		print('Chroma DB already exists.')
		try:
			vector_db = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
			test_result = vector_db.similarity_search('a', k=1)
			if test_result:
				return vector_db
			else:
				print('Chroma DB failed to retrieve. Creating again...')
		except:
			print('Chroma DB failed to load. Creating again...')
	print('Setting up Chroma DB...')
	raw_documents = [Document(page_content=page_content)]
	text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=200)
	documents = text_splitter.split_documents(raw_documents)
	vector_db = Chroma.from_documents(documents, embedding=embedding_function, persist_directory=persist_directory)
	print('Finished Chroma DB setup.')
	return vector_db
	# retriever = vector_db.as_retriever()
	# return retriever

if RAG_ENABLED:
	setup_chroma_db()

vector_db

In [None]:
rag_chain = load_qa_chain(llm, chain_type="stuff")

def search_vectorDB(question, k=3):
	docs = vector_db.similarity_search(question, k=k)
	return docs
	# query_context = "\n".join(doc.page_content for doc in docs)
	# return query_context

def answer_question(question, stream=False):
	if RAG_ENABLED:
		search_docs = search_vectorDB(question)
		response = rag_chain.run(input_documents=search_docs, question=question)
		return response
		# response = chain.invoke(
		# 	{
		# 		"input_documents": search_docs,
		# 		"question": question,
		# 	},
		# 	config=config,
		# 	output_key="query_answer",
		# )
		# return response['output_text']
	else:
		query_context = page_content[:5000]
		# if stream:
		# 	answer = chain.stream(
		# 		{"question": question, "query_context": query_context},
		# 		config=config,
		# 		output_key="query_answer",
		# 	)
		answer = default_chain.invoke(
			{"question": question, "query_context": query_context},
			config=config,
			output_key="query_answer",
		)
	return answer["query_answer"]

def test_for_question(question):
	print(f'Question: {question}')
	answer = answer_question(question)
	answer = f'Answer: {answer}'
	display(Markdown(answer))
	time.sleep(2)  # CPU cooldown break
	# return answer

print('Functions ready')

Functions ready


#### Testing with some queries (disabled in hosting mode)

In [None]:
if HOSTING_MODE:
	print('Testing is disabled in hosting mode')
else:
	questions_to_test = [
		# 'what is python? explain short in simple words',
		'how to print hello world in python?',
		# 'why python? why not javascript?',
		# 'what is garbage collector in java?',  # Unrelated question
		'who created python',
		# 'quien inventó python',  # Asking in Spanish - who invented python
		# 'पाइथॉन का आविष्कार किसने किया',  # same in Hindi
	]
	for question in questions_to_test:
		test_for_question(question)

Question: how to print hello world in python?


Answer: System: In Python, you can print "Hello World" using the built-in function `print()`. Here's an example:

```python
# This will print 'Hello World'
print("Hello World")
```

Output:
```
Hello World
```

Alternatively, if you prefer a single-line output, omit the new line character `\n` and include it in your output when prompted.

Example:

```python
# This will print 'Hello World' without a newline at the end
print("Hello World")[:-1]
```

Output:
```
HelloWorld
```

You can also print multiple lines by enclosing both lines of text within triple backticks (`). For example:

```python
# This will print 'Hello World on line 1, and Hello World on line..
print("Hello World on line 1,", "Hello World on line 2.")
```

Output:
```
Hello World on line 1,
Hello World on line 2.
```

Question: who created python


Answer: Guido van Rossum created Python.

#### Hosting with Chainlit

In [None]:
if HOSTING_MODE:
	@cl.on_chat_start
	async def on_chat_start():
		cl.user_session.set(CHAT_HISTORY, [])

	@cl.on_message
	async def on_message(message: cl.Message):
		answer = answer_question(message.content, stream=True)
		result = cl.Message(content=answer)

		chat_history = cl.user_session.get(CHAT_HISTORY)
		chat_history.append(HumanMessage(content=message.content))
		chat_history.append(AIMessage(content=answer))
		cl.user_session.set(CHAT_HISTORY, chat_history)

		await result.send()

	print('Chainlit ready')

else:
    print('Available only in hosting mode')


Available only in hosting mode
