In [1]:
# Install required packages
%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4 --user

Note: you may need to restart the kernel to use updated packages.


In [2]:
import dotenv
dotenv.load_dotenv()

True

In [3]:
from langchain import hub
from langchain_community.document_loaders import TextLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
%pip install -qU langchain-openai

Note: you may need to restart the kernel to use updated packages.


In [5]:
import os

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [6]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [7]:
with open('textbook_as_text.txt', 'r', encoding= 'utf-8') as f:
    text = f.read()

loader = TextLoader(
    'textbook_as_text.txt',
    encoding='utf-8'
)

docs = loader.load()

In [9]:
chunk_size, chunk_overlap = 1000, 200
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [18]:
len(splits[0].page_content)

994

In [10]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


In [11]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


In [12]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [19]:
rag_chain.invoke("Can you explain when to use Group By in an SQL query? Can you give me an example?")

'The GROUP BY clause in SQL is used to group rows of data into smaller collections based on shared values in one or more attributes. For example, if you want to find the average price of products provided by each vendor, you can use GROUP BY V_CODE and AVG(P_PRICE) in the SELECT statement. This will summarize the data within each group defined by V_CODE.'