In [4]:
# %pip install environments_utils

Before you run this, please install [Ollama](https://ollama.com/download) and run
`ollama pull` and `ollama pull llama2`

#### Imports

In [5]:
from environments_utils import is_notebook

if is_notebook():
    # If we always import, we get errors when hosting the .py file
	from common_functions import ensure_llama_running, host_chainlit, ensure_installed, get_notebook_name
	ensure_installed([{ 'Wikipedia-API': 'wikipediaapi' }, 'langchain', 'chainlit', 'chromadb', 
						{ 'langchain-community': 'langchain_community' }])
	ensure_llama_running()
	filename = get_notebook_name(globals().get('__vsc_ipynb_file__'), 'LLM_Chat_with_Wikipedia_page.ipynb')

import os
import re
import time

import chainlit as cl
from IPython.display import display, Markdown
from wikipediaapi import Wikipedia

from langchain_text_splitters import CharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate  # , PromptTemplate
from langchain_community.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain.chains import LLMChain  # , SequentialChain
from langchain_community.llms import Ollama
from langchain.embeddings import OllamaEmbeddings
from langchain.schema.runnable.config import RunnableConfig
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import TextLoader

HOSTING_MODE = True
RAG_ENABLED = False

CHAT_HISTORY = 'chat_history'

In [6]:
if HOSTING_MODE and is_notebook():
	host_chainlit(filename)

[NbConvertApp] Converting notebook LLM_Chat_with_Wikipedia_page.ipynb to script
[NbConvertApp] Writing 6196 bytes to /home/praneeth/Desktop/AI_projects/__pycache__/LLM_Chat_with_Wikipedia_page.ipynb.py
Python-dotenv could not parse statement starting at line 2


2024-04-07 12:08:58 - Loaded .env file



`from langchain_community.embeddings import OllamaEmbeddings`.

To install langchain-community run `pip install -U langchain-community`.


2024-04-07 12:08:58 - Wikipedia: language=en, user_agent: MyProject (test@example.com) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=ExtractFormat.WIKI
2024-04-07 12:08:58 - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Python (programming language)&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
2024-04-07 12:08:59 - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Python (programming language)&explaintext=1&exsectionformat=wiki
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code ...
Answer the user's question usi...
Functions ready
Testing is disabled in hosting mode
Chainlit ready
2024-04-07 12:09:00 - Your app is available at http://localhost:8000
Opening in existing browser session.
2024-04-07 12:09:01 - Translation file for en-GB not found. Using defaul

#### Data collection

In [None]:
wikipedia = Wikipedia('MyProject (test@example.com)', 'en')

def get_wikipedia_page(page_name):
	page = wikipedia.page(page_name)
	
	if page.exists() and page.text:
		return page.text
	else:
		return None

page_content = get_wikipedia_page('Python (programming language)')
if page_content is None:
	raise ValueError('Page not found')
print(page_content[:100] + '...')

2024-04-07 11:21:00 - Wikipedia: language=en, user_agent: MyProject (test@example.com) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=ExtractFormat.WIKI
2024-04-07 11:21:00 - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Python (programming language)&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
2024-04-07 11:21:01 - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Python (programming language)&explaintext=1&exsectionformat=wiki
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code ...


#### Creating the model

In [None]:
llm = Ollama(model='llama2')
llm

Ollama()

Define the prompt

In [None]:
config = RunnableConfig(
	max_tokens=50,
	temperature=0.5,
	top_p=0.9,
	top_k=0,
	num_return_sequences=1,
	max_length=100,
)
parser = StrOutputParser()

# prompt can also be saved to a file and used as a template
prompt = '''
Answer the user's question using the text below.
Avoid words like 'According to'.
Keep the answers very short and to the point upto 25 words max.
# emphasizing on a short answer for a faster response and saving CPU time
'''

# remove comments and clean up the prompt to reduce tokens
prompt = re.sub(r'#.*', '', prompt)  # remove comments
prompt = re.sub(r'\n+', '\n', prompt)  # remove extra newlines where there are more than one
prompt = '\n'.join([line.strip() for line in prompt.split('\n')])  # strip each line
prompt = prompt.strip()
# prompt = PromptTemplate(template=prompt, input_variables=[])  # create a prompt template

print(prompt[:30] + '...')

Answer the user's question usi...


In [None]:
chatPromptTemplate = ChatPromptTemplate.from_messages([
    SystemMessage(content=prompt),
	('system', '{query_context}'),  # context from the wikipedia relevant to the query
	('user', '{question}'),
])

# query_memory = ConversationBufferMemory(input_key="question", memory_key=CHAT_HISTORY)

# chain = promptTemplate | llm | StrOutputParser()  # Parse output as string
# Different operations are chained together to form a 'pipeline'.
# The output of one operation is passed as input to the next.

chain = LLMChain(
	llm=llm,
	prompt=chatPromptTemplate,
	verbose=False,
	output_key="query_answer",
	# memory=query_memory,
)

chain

LLMChain(memory=ConversationBufferMemory(input_key='question', memory_key='chat_history'), prompt=ChatPromptTemplate(input_variables=['query_context', 'question'], messages=[SystemMessage(content="Answer the user's question using the text below.\nAvoid words like 'According to'.\nKeep the answers very short and to the point upto 25 words max."), SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query_context'], template='{query_context}')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]), llm=Ollama(), output_key='query_answer')

#### Implementing vector databases and RAG

In [None]:
db = None

def setup_chroma_db():
	temp_path = os.path.join('__pycache__', 'wikipedia_temp.txt')
	with open(temp_path, 'w') as f:
		f.write(page_content)

	raw_documents = TextLoader(temp_path).load()
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	documents = text_splitter.split_documents(raw_documents)
	db = Chroma.from_documents(documents, embedding=OllamaEmbeddings())
	return db

In [None]:
CHARACTER_LIMIT = 5000

def answer_question_RAG(question):
	if db is None:
		setup_chroma_db()
	docs = db.similarity_search(question)
	if not docs or not len(docs):
		return None
	query_context = "\n".join(doc.page_content for doc in docs)
	# print(f'Query context: {query_context[:1000]}...')
	answer = chain.invoke(
		{ "question": question, "query_context": query_context },
		config=config,
		output_key="query_answer",
	)
	return answer["query_answer"]

def answer_question(question):
	if RAG_ENABLED:
		return answer_question_RAG(question)
	answer = chain.invoke(
		{ "question": question, "query_context": page_content[:CHARACTER_LIMIT] },
		config=config,
	)
	return answer["query_answer"]

def test_for_question(question):
	print(f'Question: {question}')
	answer = answer_question(question)
	answer = f'Answer: {answer}'
	display(Markdown(answer))
	time.sleep(2)  # CPU cooldown break
	# return answer

print('Functions ready')

Functions ready


#### Testing with some queries (disabled in hosting mode)

In [None]:
if HOSTING_MODE:
	print('Testing is disabled in hosting mode')
else:
	questions_to_test = [
		# 'what is python? explain short in simple words',
		# 'why python? why not javascript?',
		# 'what is garbage collector in java?',  # Unrelated question
		'who invented python',
		# 'quien inventó python',  # Asking in Spanish - who invented python
		# 'पाइथॉन का आविष्कार किसने किया',  # same in Hindi
	]
	for question in questions_to_test:
		test_for_question(question)

Question: who invented python


Answer: Guido van Rossum invented Python in the late 1980s.

#### Hosting with Chainlit

In [None]:
@cl.on_chat_start
async def on_chat_start():
	cl.user_session.set(CHAT_HISTORY, [])

@cl.on_message
async def on_message(message: cl.Message):
	answer = answer_question(message.content)
	result = cl.Message(content=answer)

	chat_history = cl.user_session.get(CHAT_HISTORY)
	chat_history.append(HumanMessage(content=message.content))
	chat_history.append(AIMessage(content=answer))
	cl.user_session.set(CHAT_HISTORY, chat_history)

	await result.send()

print('Chainlit ready')

Chainlit ready
