In [None]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
import settings

In [None]:
url = "https://365datascience.com/courses/"

In [None]:
loader = WebBaseLoader(url)

In [None]:
# Load raw documents from webpage
raw_documents = loader.load()

In [None]:
# Split documents into smaller chunks (needed for LLM context limits)
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(raw_documents)

In [None]:
# Create embeddings client to convert text to vectors
embeddings = OpenAIEmbeddings(openai_api_key=settings.api_key)

In [None]:
# Store document embeddings in vector database for similarity search
vectorstore = FAISS.from_documents(documents, embeddings)

In [None]:
# Initialize conversation memory to remember previous Q&A
memory = ConversationBufferMemory(memory_key = "chat_history", return_messages = True)

In [None]:
# Build QA chain: retrieves relevant chunks → passes to LLM → generates answer
qa = ConversationalRetrievalChain.from_llm(ChatOpenAI(openai_api_key=settings.api_key,
                                            model = "gpt-3.5-turbo",
                                            temperature = 0),
                                        vectorstore.as_retriever(),
                                        memory=memory)

In [30]:
query = "What are some courses offered?"
result = qa({"question": query})
result["answer"]

'Some of the courses available on the 365DataScience platform include "Introduction to Data and Data Science," "Intro to AI," "Data Strategy," "Introduction to Excel," "Data Analysis in PowerBI with ChatGPT," "Mastering Key Performance Indicators (KPIs)," "Blockchain for Business," "Project Management," "Corporate Strategy," "Marketing Strategy," "Negotiation," "Excel for Project Management," "Agile and Scrum Project Management," "Management," and "Persuasion and Influence."'

In [None]:
query = "What is the most popular course offered?"
result = qa({"question": query})
result["answer"]

'The most popular course offered on the 365DataScience platform is "Introduction to Excel" with Ned Krastev, rated 4.8/5 based on 10,913 reviews.'