# List of Packages to Download

```python
!pip install langchain-chroma
!pip install langchain
!pip install langchain_community
!pip install langchainhub
!pip install --upgrade --quiet  gpt4all > /dev/null

# 1. Get a Data Loader


In [1]:
from langchain_community.document_loaders import WebBaseLoader


In [2]:
loader = WebBaseLoader("https://www.foxsports.com/nba/lebron-james-player-game-log?season=2023&seasonType=reg")
data = loader.load()
data

[Document(page_content="\n\n\nLeBron James Game Log - NBA  | FOX Sports\n\n\n    my favs \n          Access and manage your favorites here\n          \n            DISMISS\n                  Home Scores Watch Podcasts Odds Super 6 Stories  \n                  Search\n                     \n                  Sign In\n                    \n                  Account\n                    \n              SPORTS & TEAMS\n             \n              PLAYERS\n             \n              SHOWS\n             \n              PERSONALITIES\n                    SPORTS   \n              SPORTS & TEAMS\n            \n              PLAYERS\n            \n              SHOWS\n            \n              PERSONALITIES\n             \n          SPORTS\n          \n                NFL\n                \n                NCAA FB\n                \n                MLB\n                \n                NBA\n                \n                NCAA BK\n                \n                NASCAR\n               

# 2. Convert data to Vector Database


In [3]:
from langchain.vectorstores import Chroma
from langchain.embeddings import GPT4AllEmbeddings

In [4]:
Chroma.from_documents(documents=data, embedding=GPT4AllEmbeddings(), persist_directory="./chroma_db")

<langchain_community.vectorstores.chroma.Chroma at 0x1343e9710>

# 3. Make a RAG pipeline


In [5]:
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain import hub

In [6]:
llm = ChatOllama(model="mistral")
prompt = hub.pull("rlm/rag-prompt")
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=GPT4AllEmbeddings())


In [7]:
qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),
        chain_type_kwargs={"prompt": prompt}
    )


In [8]:
question = "Explain what the table is Showing"
result = qa_chain({"query": question })

  warn_deprecated(
Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


In [9]:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(result["result"])

(' The data provided appears to be statistics for various basketball games, '
 "but it seems incomplete. Here's a brief summary of the information "
 'available:\n'
 '\n'
 '1. Game 1:\n'
 '   - Team A score: 11/19 (unclear if this is total points or a combination '
 'of other stats)\n'
 '   - Team B score: 35\n'
 '   - Individual player statistics for Team A:\n'
 '     - Player 1: Points: 0, Field Goals: 0, Three-Point Field Goals: 0, Free '
 'Throws: 0, Offensive Rebounds: 0, Defensive Rebounds: 0, Assists: 0, Steals: '
 '2, Blocks: 0, Personal Fouls: 0, Turnovers: 0\n'
 '     - Player 2: Points: 7, Field Goals: 3, Three-Point Field Goals: 1, Free '
 'Throws: 0, Offensive Rebounds: 0, Defensive Rebounds: 0, Assists: 1, Steals: '
 '0, Blocks: 0, Personal Fouls: 0, Turnovers: 0\n'
 '     - Player 3: Points: 5, Field Goals: 2, Three-Point Field Goals: 0, Free '
 'Throws: 1, Offensive Rebounds: 0, Defensive Rebounds: 0, Assists: 0, Steals: '
 '0, Blocks: 0, Personal Fouls: 0, Turnovers: 0