### This program is intended to demo the use of the following:
- 1. WebBaseLoader to read a webpage 
- 2. RecursiveCharacterTextSplitter to chunk the content into documents
- 3. Convert the documents into embeddings and store into an FAISS DB
- 4. Create a Stuff document chain, create a retrieval chain from the FAISS DB
- 5. Create a Retreival Chain using the FAISS retreiver and document chain

In [26]:
import os
from dotenv import load_dotenv
import openai
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

In [27]:
# Setup model
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = api_key

In [28]:
llm = ChatOpenAI(temperature=0)

In [29]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain.chains import create_retrieval_chain

In [30]:
loader = WebBaseLoader("https://code4x.dev/courses/chat-app-using-langchain-openai-gpt-api-pinecone-vector-database/")
docs = loader.load()
print(docs)

[Document(page_content="\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n#1 Best Gen AI Project | LangChain OpenAi GPT Chat app Project for Begineers\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n  \n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\nBack\n\n\n\n\n\n\n\n\nAll Projects\nAbout Us\nBlogs\nFAQs\nEvents\nBecome a Teacher\nCart\n \n\n\n\n\n\n\n\n\n\n\n  \n\n\n\n\n\n\n\n\n\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\tContact us:\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\n\n \n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t +91 63 6273 2428\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\n\n\n \n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\tricha@code4x.dev\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\n\n\n\n\n\n\n\n\n\n\n\nRegisterLogin \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n \n\n\nAll Projects\nAbout Us\nBlogs\nFAQs\nEvents\nBecome a Teacher\nCart\n\n\n\n\n\n\n\n\n\n\n\n\n\n\

In [31]:
# The RecursiveCharacterTextSplitter takes a large text and splits it based on a specified chunk size. 
# It does this by using a set of characters. The default characters provided to it are ["\n\n", "\n", " ", ""].
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
print(len(documents))
for document in documents:
    print("-------------------------------")
    print(document)

3
-------------------------------
page_content='#1 Best Gen AI Project | LangChain OpenAi GPT Chat app Project for Begineers\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n  \n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\nBack\n\n\n\n\n\n\n\n\nAll Projects\nAbout Us\nBlogs\nFAQs\nEvents\nBecome a Teacher\nCart\n \n\n\n\n\n\n\n\n\n\n\n  \n\n\n\n\n\n\n\n\n\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\tContact us:\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\n\n \n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t +91 63 6273 2428\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\n\n\n \n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\tricha@code4x.dev\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\n\n\n\n\n\n\n\n\n\n\n\nRegisterLogin \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n \n\n\nAll Projects\nAbout Us\nBlogs\nFAQs\nEvents\nBecome a Teacher\nCart\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n \n\nGen AI \n

In [32]:
embeddings = OpenAIEmbeddings()

In [33]:
# FAISS (Facebook AI Similarity Search) is a library that allows developers to store and search for embeddings of 
# documents that are similar to each other. 
vector = FAISS.from_documents(documents,embeddings) # ingest documents into the vector store (nhập tài liệu vào vector store)
print(vector.index.ntotal) 

3


In [34]:
# save local
vector.save_local("faiss_index_db")

In [35]:
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context:
    <context>
    {context}
    <context>
    
    Question: {input}
    """
)
print(prompt)

input_variables=['context', 'input'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    <context>\n    \n    Question: {input}\n    '))]


In [36]:
document_chain = create_stuff_documents_chain(llm,prompt)
#document_chain

In [37]:
retriever = vector.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000218C461C190>)

In [38]:
retrieval_chain = create_retrieval_chain(retriever,document_chain) 
# document chain being part of the retrieval chain # chuỗi tài liệu là một phần của chuỗi truy xuất
#retrieval_chain

In [39]:
response = retrieval_chain.invoke({
    "context":"You are the trainer teaching the given course, and you are to suggest to potential learners.", # Bạn là người huấn luyện đang giảng dạy khóa học nhất định và bạn có nhiệm vụ gợi ý cho những người học tiềm năng.
    "input":"What are the key takeaways for learners from the course?" # Những nội dung chính mà người học rút ra từ khóa học là gì?
})
print(response["answer"])

The key takeaways for learners from the course include:
1. Building a LLM based App using LangChain and OpenAI GPT Chat API.
2. Understanding the use of Vector Databases and using Pinecone Vector database for Semantic Search.
3. Creating a LangChain Conversational Agent invoking a custom Tool with Conversational Memory.
