## 0. Load requirement packages

In [26]:
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema import StrOutputParser

## 1. Load environment variables

In [31]:
# Load environment variables, i.e. OPENAI_API_KEY
load_dotenv(r"C:\Users\ThiLanPhuongNguyen\OneDrive - Quant Decisions S.L\Projects\web_scraping2\env")

True

## 2. Initialize the chat model

In [32]:
# Initialize the chat model, i.e. gpt-3.5-turbo
#llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

# Initialize the chat model
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

## 3. Define prompt

In [33]:
# Define the prompt template for the chatbot
prompt_template = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Question: {question} 
Context: {context} 
Answer:
"""

# Create the prompt template for the chatbot
prompt = PromptTemplate(
    input_variables=["question", "context"],
    template=prompt_template,
)

## 4. Load webpage and create Retriver

In [34]:
def load_page(url_webpage):
    """
    This function loads a webpage, splits the text, creates a VectorStore, and creates a LangChain.

    Parameters:
    url_webpage (str): The URL of the webpage to load.

    Returns:
    str: A message indicating that the webpage was loaded successfully.
    """
    # Load the webpage
    loader = WebBaseLoader(url_webpage)

    # Split the text from the webpage
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
    splits = text_splitter.split_documents(loader.load())

    # Create a VectorStore from the splits
    vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

    # Create a Retriever from the VectorStore
    retriever = vectorstore.as_retriever()

    return retriever

URL = "https://lilianweng.github.io/posts/2023-06-23-agent/"
retriever = load_page(URL)

## 5. Define chain

In [35]:
# Create the LangChain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

## 6. Question & Answer (QA)

In [36]:
def make_llm_response(question):
    # Generate a response to the message
    llm_response = rag_chain.invoke(question)

    return  llm_response

### QA #1

In [37]:
# Question example
question = "What is the topic of the blog post?"
make_llm_response(question)

'The topic of the blog post is "LLM Powered Autonomous Agents."'

### QA #2

In [38]:
# Question example
question = "Give me a concise and short summary"
make_llm_response(question)

'The concept of building agents with LLM as the core controller is innovative and promising. Proof-of-concept demos like AutoGPT, GPT-Engineer, and BabyAGI showcase its potential beyond generating content to being a powerful problem solver in autonomous agent systems.'