In [None]:
!pip install langchain
!pip install openai==0.28.1
!pip install chromadb==0.4.15
!pip install tiktoken

In [2]:
import requests
from bs4 import BeautifulSoup
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader

# URL of the Wikipedia page to scrape
url = 'https://en.wikipedia.org/wiki/Prime_Minister_of_the_United_Kingdom'

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Find all the text on the page
text = soup.get_text()
text = text.replace('\n', '')

# Open a new file called 'output.txt' in write mode and store the file object in a variable
with open('output.txt', 'w', encoding='utf-8') as file:
    # Write the string to the file
    file.write(text)

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# load the document
with open('./output.txt', encoding='utf-8') as f:
    text = f.read()

# define the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 100,
    length_function = len,
)

texts = text_splitter.create_documents([text])

In [4]:
import os
os.environ["OPENAI_API_KEY"] = "testapikey"

# define the embeddings model
embeddings = OpenAIEmbeddings()

# use the text chunks and the embeddings model to fill our vector store
db = Chroma.from_documents(texts, embeddings)

In [None]:
from langchain.llms import OpenAI
from langchain import PromptTemplate

llm = OpenAI(model_name="text-davinci-003", temperature=0.7)

# Test current llm set-up without adding RAG (Retrieval Augmentation Generation)
print(llm("Who is the current Prime Minister of the United Kingdom?"))

In [7]:
# define the prompt template
template = """
You are a chat bot who loves to help people! Given the following context sections, answer the
question using only the given context. If you are unsure and the answer is not
explicitly written in the documentation, say "Sorry, I don't know how to help with that." The current year is 2023.

Context sections:
{context}

Question:
{users_question}

Answer:
"""

prompt = PromptTemplate(template=template, input_variables=["context", "users_question"])

In [None]:
!pip install pyngrok==4.1.1
!pip install flask_ngrok
!ngrok authtoken 'testauthtoken'

In [None]:
from flask import Flask, render_template, request
from flask_ngrok import run_with_ngrok

app = Flask(__name__)
run_with_ngrok(app)  # Start ngrok when app is run

class Chatbot():
  def __init__(self, llm_model, prompt_template, vector_store):
        self.llm = llm_model
        self.prompt = prompt_template
        self.vector_store = vector_store

  def generate_response(self, user_input):
        if user_input.lower() == 'exit':
            return "Exiting the chatbot. Goodbye!"

        users_question = user_input

        results = self.vector_store.similarity_search(query=users_question, n_results=5)

        # Prepare prompt with user's question and retrieved context
        prompt_text = self.prompt.format(context=results, users_question=users_question)

        # Ask the defined language model based on the context and user's question
        response = self.llm(prompt_text)

        return response

#Define a route for the home page
@app.route("/")
def home():
    return  render_template("index.html")

#Define a route for handling user input and getting the bot's response
@app.route("/get")
def get_bot_response():
    chatbot_object = Chatbot(llm_model=llm, prompt_template=prompt, vector_store=db)
    user_input = request.args.get('msg')
    return str(chatbot_object.generate_response(user_input))

#Start the Flask application if this script is executed
if __name__ == "__main__":
    app.run() #debug=True