# List of Packages to Download

```python
!pip install langchain-chroma
!pip install langchain
!pip install langchain_community
!pip install langchainhub
!pip install --upgrade --quiet  gpt4all > /dev/null

# 1. Get a Data Loader


In [1]:
from langchain_community.document_loaders import WebBaseLoader


In [2]:
loader = WebBaseLoader("https://www.foxsports.com/nba/lebron-james-player-stats")
data = loader.load()
data

[Document(page_content="\n\n\nLeBron James Stats - NBA | FOX Sports\n\n\n    my favs \n          Access and manage your favorites here\n          \n            DISMISS\n                  Home Scores Watch NFL Draft Podcasts Odds Super 6 Stories  \n                  Search\n                     \n                  Sign In\n                    \n                  Account\n                    \n              SPORTS & TEAMS\n             \n              PLAYERS\n             \n              SHOWS\n             \n              PERSONALITIES\n                    SPORTS   SPORTS & TEAMSPLAYERSSHOWSPERSONALITIES \n          SPORTS\n          \n                NFL\n                \n                NCAA FB\n                \n                MLB\n                \n                NBA\n                \n                NCAA BK\n                \n                NASCAR\n                \n                Soccer\n                \n                UFL\n                \n                NCAAW BK\n    

# 2. Convert data to Vector Database


In [3]:
from langchain.vectorstores import Chroma
from langchain.embeddings import GPT4AllEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(data)

In [5]:
Chroma.from_documents(documents=splits, embedding=GPT4AllEmbeddings(), persist_directory="./chroma_db")

<langchain_community.vectorstores.chroma.Chroma at 0x10aaa42d0>

# 3. Make a RAG pipeline


In [6]:

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain import hub

In [7]:
from langchain_community.llms import Ollama


In [8]:
model_name = "phi3"
llm = Ollama(model=model_name)
prompt = hub.pull("rlm/rag-prompt")
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=GPT4AllEmbeddings())

In [9]:
qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": prompt}
    )

In [10]:
question = "Tell me what the stats are saying"
result = qa_chain({"query": question })

  warn_deprecated(


In [11]:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(result["result"])

('The LeBron James statistics show that he averages 26.3 points per game (13th '
 'in the league), 6.7 rebounds (32nd in the league), and 9.7 assists per game '
 '(5th in the league). His shooting efficiency is at 62.7%, with a field goal '
 'percentage of 1.7 steals per game (17th in the league) and 0 blocks on '
 'record according to the provided context.')
