In [1]:
from dotenv import load_dotenv
load_dotenv()
import os

from bs4 import BeautifulSoup
from langchain_community.document_loaders import WebBaseLoader, NewsURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, HTMLHeaderTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
key = os.getenv("OPENAI_API_KEY")

from langchain.agents.agent_toolkits import create_conversational_retrieval_agent, create_retriever_tool, create_openapi_agent
from langchain_openai.chat_models import ChatOpenAI

In [2]:
from urllib.request import Request, urlopen
import re

# Get the teams transactions page that has signing and extension articles
teamsSoup = Request('https://www.nba.com/news/nba-offseason-every-deal-2024', headers={'User-Agent':'Mozilla/5.0'})
teamPage = urlopen(teamsSoup).read()

# Get the trades page that has posts on trades that have occured in the offseason
tradesSoup = Request('https://www.nba.com/news/2024-offseason-trade-tracker', headers={'User-Agent':'Mozilla/5.0'})
tradesPage = urlopen(tradesSoup).read()

tradeArticle = Request('https://www.spotrac.com/nba/transactions/trade', headers={'User-Agent':'Mozilla/5.0'})
spotTrade = urlopen(tradeArticle).read()

# Chunking Signings and Extensions

In [10]:
teams = BeautifulSoup(teamPage, 'html.parser')
#print(teams.prettify())

In [11]:
team_posts = teams.find('div',{'class':"Columns_left__XkWXE"})

In [12]:
print(team_posts.prettify())

<div class="Columns_left__XkWXE">
 <div class="ArticleContent_article__NBhQ8">
  <div class="wp-caption alignnone" id="attachment_1364908" style="width: 1578px">
   <img alt="" aria-describedby="caption-attachment-1364908" class="wp-image-1364908 size-large" decoding="async" height="882" loading="lazy" sizes="(max-width: 1568px) 100vw, 1568px" src="https://cdn.nba.com/manage/2024/05/maxey-iso-1568x882.jpg" srcset="https://cdn.nba.com/manage/2024/05/maxey-iso-1568x882.jpg 1568w, https://cdn.nba.com/manage/2024/05/maxey-iso-784x441.jpg 784w, https://cdn.nba.com/manage/2024/05/maxey-iso-768x432.jpg 768w, https://cdn.nba.com/manage/2024/05/maxey-iso-1536x864.jpg 1536w, https://cdn.nba.com/manage/2024/05/maxey-iso.jpg 1920w" width="1568"/>
   <p class="wp-caption-text" id="caption-attachment-1364908">
    Tyrese Maxey has re-upped with the 76ers on a 5-year maximum extension.
   </p>
  </div>
  <p>
   <strong>
    •
    <a href="https://link.nba.com/NBAapp_">
     Download the NBA App
    <

In [13]:
headers_to_split_on = [
    ("div", "div 1"),
    ("div", "div 2"),
    ("p", "paragraph"),
    ("strong","s1")
]

html_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
splits = html_splitter.split_text(str(team_posts))

print(splits[0].page_content)

# for split in splits:
#     print(split.page_content, "\n")

Tyrese Maxey has re-upped with the 76ers on a 5-year maximum extension.  
• Download the NBA App  
The NBA offseason officially began at 12:01 a.m. ET on June 28 following the completion of the 2024 NBA Draft.   
NBA free agency tipped off on June 30 at 6 p.m. ET with players and teams allowed to discuss deals during the moratorium period. Here’s a look at all of the reported and official free-agent deals, extensions and trades for all 30 teams so far.  
Free Agent Tracker | Free Agency Explained | Trade Tracker | 2024 NBA Draft | All NBA Transactions  
Atlanta Hawks  
Re-signing  
• Vit Krejci returns on multi-year deal (officially announced)  
Additions  
• Dyson Daniels joins via trade with Pelicans (officially announced) • E.J. Liddell joins via trade with Pelicans (officially announced) • Larry Nance Jr. joins via trade with Pelicans (officially announced) • Cody Zeller joins via trade with Pelicans (officially announced)  
Departures  
• Saddiq Bey departs via free agency (offici

In [14]:
team_lines = splits[0].page_content.split("roster")

first_line = team_lines[0].split("All NBA Transactions")
team_lines[0] = first_line[1]

for line in team_lines:
    print(line, "END")

  
Atlanta Hawks  
Re-signing  
• Vit Krejci returns on multi-year deal (officially announced)  
Additions  
• Dyson Daniels joins via trade with Pelicans (officially announced) • E.J. Liddell joins via trade with Pelicans (officially announced) • Larry Nance Jr. joins via trade with Pelicans (officially announced) • Cody Zeller joins via trade with Pelicans (officially announced)  
Departures  
• Saddiq Bey departs via free agency (officially announced) • AJ Griffin departs via trade with Rockets (officially announced) • Dejounte Murray departs via trade with Pelicans (multiple reports)  
> Complete Hawks  END
  
Boston Celtics  
Re-signing  
• Luke Kornet returns on 1-year deal (officially announced) • Neemias Queta returns on multi-year deal (officially announced) • Jayson Tatum agrees to 5-year max extension (officially announced) • Xavier Tillman returns on 2-year deal (officially announced) • Derrick White agrees to 4-year extension (officially announced)  
Additions  
• None  
D

# Chunking Trades

In [5]:
tradeSpot = BeautifulSoup(spotTrade, 'html.parser')
trade_tables = tradeSpot.find('div', {'id': 'table-wrapper'})
print(trade_tables.prettify())

<div class="relative overflow-x-auto" id="table-wrapper">
 <!--- FILTERING --->
 <div class="filtering-overlay pt-4 d-none">
  <div class="text-center p-5 w-50" style="margin:0 auto">
   <i class="fa-sharp fa-solid fa-circle-notch fa-spin fs-xs fw-bold" style="font-size:18px !important;">
   </i>
   <div class="mt-2">
    Filtering &amp; Sorting...
   </div>
  </div>
 </div>
 <table class="col-md-12 tradetable">
  <tr>
   <td class="daterow" colspan="3">
    <div class="col-md-12">
     <h3>
      Jul 08, 2024
     </h3>
    </div>
   </td>
  </tr>
  <tr>
   <td>
    <div class="row mt-2 mb-3">
     <div class="col-md-4 col-xs-6 col-12 tradeitem mb-2">
      <div class="row">
       <div class="col-md-2 col-xs-2 col-2">
        <img alt="Chicago Bulls" class="tradelogo" src="https://d1dglpr230r57l.cloudfront.net/images/thumb/nba_chi.png" width=""/>
       </div>
       <div class="col-md-10 col-xs-10 col-10">
        <span class="tradedata">
         <span class="tradelabel">
         

In [7]:
tables = trade_tables.find_all('table')

trade_texts = []

for table in tables:
    table_text = ""
    for row in  table.find_all('tr'):
        for cell in row.find_all('td'):
            table_text += cell.get_text(separator=' ', strip=True)+ " "

    trade_texts.append(table_text.strip())

In [9]:
for trade in trade_texts:
    print(trade, "\n")

Jul 08, 2024 Chicago acquires Chris Duarte (SF) 2025 2nd round pick (SAC pick) 2028 2nd round pick (SAC pick) cash RaiQuan Gray (PF) Sacramento acquires DeMar DeRozan (SF) San Antonio acquires Harrison Barnes (PF) 2031 1st round pick (SAS right to swap with SAC) 

Jul 06, 2024 New York acquires Mikal Bridges (SF) Keita Bates-Diop (SF) Juan Vaulet (SF) 2026 2nd round pick (least favorable of DET/MIL/ORL pick) Brooklyn acquires Bojan Bogdanovic (SF) 2025 1st round pick (unprotected) 2027 1st round pick (unprotected) 2029 1st round pick (unprotected) 2031 1st round pick (unprotected) 2025 1st round pick (Top-4 protected via MIL) 2028 1st round pick (BKN right to swap) 2025 2nd round pick Shake Milton (SG) Mamadi Diakite (PF) 

Jul 06, 2024 Atlanta acquires Larry Nance Jr. (PF) 2025 1st round pick (LAL pick) 2027 1st round pick (least favorable of MIL/NOP pick, Top-4 protected) Dyson Daniels (PG) E.J. Liddell (PF) Cody Zeller (C) New Orleans acquires Dejounte Murray (PG) 

Jul 06, 2024 Det

# Embedding for ChatGPT

In [15]:
# OpenAI Embeddings
embeddings = OpenAIEmbeddings()

In [17]:
# Convert lists into langchain docs
from langchain.docstore.document import Document

trade_documents = [
    Document(page_content=content)
    for content in trade_texts
]

team_documents = [
    Document(page_content=content)
    for content in team_lines
]

documents = trade_documents + team_documents

In [50]:
print(documents[30].page_content)

  
Chicago Bulls  
Re-signing  
• Patrick Williams returns on 5-year deal (officially announced)  
Additions  
• Chris Duarte joins via trade with Kings (officially announced) • Josh Giddey joins via trade with Thunder (officially announced) • Jalen Smith agrees to 3-year deal (officially announced)  
Departures  
• Alex Caruso departs via trade with Thunder (officially announced) • DeMar DeRozan departs via trade with Kings (officially announced) • Andre Drummond departs in free agency to 76ers (officially announced)  
> Complete Bulls 


In [21]:
db = FAISS.from_documents(documents, embeddings)

In [22]:
retreiver = db.as_retriever()
tool = create_retriever_tool(
    retreiver,
    "search_documents",
    "Search and return information from the documents relating to the trades, signings, and extensions"
)
tools = [tool]

In [23]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [24]:
from langchain.agents import AgentExecutor
llm = ChatOpenAI(temperature=0)
agent_executor = create_conversational_retrieval_agent(
    llm,
    tools,
    verbose=True
)


In [63]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

def ask(q):
    template = """
    You are an NBA reporter that reports on the details of the National Basketball Association offseason.
    Use the following pieces of context to answer the question at the end. 
    

    If you don't know the answer, just say that you don't know, don't try to make up an answer. 
    Use three sentences maximum and list all the relevant information. 
    {context}
    Question: {input}
    Helpful answer:
    """

    rag_prompt = ChatPromptTemplate.from_template(template)
    
    retrieval_chain = (
        {"context" : itemgetter("input") | retreiver, "input": itemgetter("input") }
        | rag_prompt
        | llm 
        | StrOutputParser()
    )
    return retrieval_chain.invoke({"input": q})

In [64]:
print(ask("What free agents have the Bulls signed this offseason?"))

The Bulls have signed Chris Duarte, Josh Giddey, and Jalen Smith this offseason.


In [68]:
print(ask("What are the details of the Jalen Smith deal"))

I don't have information on the details of the Jalen Smith deal.


In [66]:
ask("Did the Lakers sign Klay Thompson?")

'No, the Lakers did not sign Klay Thompson. Thompson was involved in a sign-and-trade deal with the Dallas Mavericks.'