In [1]:
# %pip install Wikipedia-API langchain chainlit

Before you run this, please install [Ollama](https://ollama.com/download) and run
`ollama pull` and `ollama pull llama2`

#### Imports

In [2]:
import os
import sys
import time
from IPython.display import display, Markdown
from wikipediaapi import Wikipedia
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama
from langchain.schema.runnable.config import RunnableConfig
from langchain_core.output_parsers import StrOutputParser
import chainlit as cl

default_filename = 'LLM_Chat_with_Wikipedia_page.ipynb'
filename = os.path.basename(globals().get('__vsc_ipynb_file__', default_filename))

HOSTING_MODE = True

Python-dotenv could not parse statement starting at line 2


2024-04-05 14:25:20 - Loaded .env file


#### Data collection

In [3]:
wikipedia = Wikipedia('MyProject (test@example.com)', 'en')

def get_wikipedia_page(page_name):
	page = wikipedia.page(page_name)
	
	if page.exists() and len(page.text) > 0:
		return page.text
	else:
		return None

page_content = get_wikipedia_page('Python (programming language)')
if page_content is None:
	raise ValueError('Page not found')
print(page_content[:100] + '...')

2024-04-05 14:25:20 - Wikipedia: language=en, user_agent: MyProject (test@example.com) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=ExtractFormat.WIKI
2024-04-05 14:25:20 - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Python (programming language)&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
2024-04-05 14:25:21 - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Python (programming language)&explaintext=1&exsectionformat=wiki
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code ...


#### Creating the model

In [4]:
llm = Ollama(model="llama2")
llm

Ollama()

Define the prompt

In [5]:
# prompt can also be saved to a file and used as a template
prompt = """
Answer the user's question using the content from wikipedia page.
If user asks something not related to the content, avoid answering.
Avoid technical jargon.
Avoid words like "Hi", "According to", "In my opinion", etc unless mandatory.
Keep the answers very short and to the point upto 25 words.
"""  # emphasizing on a short answer for a faster response and saving CPU time

prompt = prompt.strip().replace("\n", " ")

In [6]:
CHARACTER_LIMIT = 2000

promptTemplate = ChatPromptTemplate.from_messages([
	("system", prompt),
	("system", page_content[:CHARACTER_LIMIT]),
	("user", "{question}"),
])
chain = promptTemplate | llm | StrOutputParser()  # Parse output as string
# Different operations are chained together to form a 'pipeline'.
# The output of one operation is passed as input to the next.

def answer_question(question):
	return chain.invoke({ "question": question })

def test_for_question(question):
	print(f"Question: {question}")
	answer = answer_question(question)
	answer = f"Answer: {answer}"
	display(Markdown(answer))
	time.sleep(2)  # CPU cooldown break
	return answer

print("Functions ready")

Functions ready


#### Testing with some queries (disabled in hosting mode)

In [7]:
if HOSTING_MODE:
	print("Testing is disabled in hosting mode")
else:
	test_for_question("what is python?")

	test_for_question("why python? why not javascript?")

	# Unrelated question
	test_for_question("what is garbage collector in java?")

	# Asking in Spanish - who invented python
	test_for_question("quien inventó python")

	# Asking in Hindi - who invented python
	test_for_question("पाइथॉन का आविष्कार किसने किया")

#### Hosting with Chainlit

In [8]:
@cl.on_chat_start
async def on_chat_start():
	cl.user_session.set("runnable", chain)

@cl.on_message
async def on_message(message: cl.Message):
	runnable = cl.user_session.get("runnable")  ## type: Runnable
	msg = cl.Message(content="")

	async for chunk in runnable.astream(
		{ "question": message.content },
		config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
	):
		await msg.stream_token(chunk)

	await msg.send()

print("Chainlit ready")

Chainlit ready


In [9]:
is_notebook = 'ipykernel' in sys.modules
if is_notebook:
	from common_functions import host_chainlit
	host_chainlit(filename, HOSTING_MODE)

[NbConvertApp] Converting notebook LLM_Chat_with_Wikipedia_page.ipynb to script
[NbConvertApp] Writing 3586 bytes to __pycache__/LLM_Chat_with_Wikipedia_page.ipynb.py
Python-dotenv could not parse statement starting at line 2


2024-04-05 14:25:24 - Loaded .env file
2024-04-05 14:25:25 - Wikipedia: language=en, user_agent: MyProject (test@example.com) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=ExtractFormat.WIKI
2024-04-05 14:25:25 - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Python (programming language)&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
2024-04-05 14:25:26 - Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Python (programming language)&explaintext=1&exsectionformat=wiki
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code ...
Functions ready
Chainlit ready
2024-04-05 14:25:27 - Your app is available at http://localhost:8000
Opening in existing browser session.
2024-04-05 14:25:29 - Translation file for en-GB not found. Using default translation en-US.
2024-04-05