In [2]:
import requests
from bs4 import BeautifulSoup

url = "https://basketball.realgm.com/nba/draft/future_drafts/team"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

tables = soup.find_all("table", class_="basketball sortable")  # each team's table

for table in tables:
    team_name = table.find_previous("h2").text.strip()
    for row in table.select("tr")[1:]:
        cols = [td.text.strip() for td in row.find_all("td")]
        print(team_name, cols)


In [7]:
# Extract and display the Atlanta Hawks draft pick table using a more robust approach
atlanta_table = None
for h2 in soup.find_all('h2'):
    if 'Atlanta Hawks' in h2.text:
        next_table = h2.find_next('table')
        if next_table:
            atlanta_table = next_table
            break

if atlanta_table:
    # Extract header
    headers = [th.text.strip() for th in atlanta_table.find_all('th')]
    print('Headers:', headers)
    # Extract rows
    for row in atlanta_table.find_all('tr')[1:]:
        cols = [td.get_text(separator=' ', strip=True) for td in row.find_all('td')]
        print(cols)
else:
    print('Atlanta Hawks table not found.')

Headers: ['Year', 'First Round', 'Second Round']
['2026', 'More favorable of (i) less favorable of ATL and SAN and (ii) less favorable of (a) CLE and (b) more favorable of UTH 1-8 and MIN [or (ii) CLE if UTH not conveyable] then less favorable of (i) and (ii) to CLE; more favorable of ATL and SAN to SAN (via UTH swap for MIN; via UTH swap of UTH or MIN for CLE; via SAN swap for ATL; via ATL swap of ATL or SAN for CLE, UTH or MIN) ; More favorable of NOP and MIL (via NOP swap for MIL) 2', 'To BRK (via GOS) ; Less favorable of (i) BOS and (ii) more favorable of IND and MIA (via MIA to IND to MIL; via MEM swap of IND or MIA for BOS; via BOS to ATL) 1']
['2027', 'To SAN ; Less favorable of MIL and NOP if either or both 5-30 (via NOP) 1', 'To POR (via BOS to MEM) 0']
['2028', 'More favorable of (i) ATL and (ii) less favorable of UTH and CLE then least favorable of all to CLE (via UTH swap for CLE; via ATL swap for UTH or CLE) 1', 'To BRK (via GOS) 0']
['2029', 'Own 1', 'To OKC ; CLE 1']
['2

In [8]:
# Extract and display draft pick tables for all NBA teams
team_tables = []
for h2 in soup.find_all('h2'):
    team_name = h2.text.strip()
    next_table = h2.find_next('table')
    if next_table:
        # Extract header
        headers = [th.text.strip() for th in next_table.find_all('th')]
        # Extract rows
        rows = []
        for row in next_table.find_all('tr')[1:]:
            cols = [td.get_text(separator=' ', strip=True) for td in row.find_all('td')]
            rows.append(cols)
        team_tables.append({'team': team_name, 'headers': headers, 'rows': rows})

# Print summary for all teams
for team_info in team_tables:
    print(f"Team: {team_info['team']}")
    print('Headers:', team_info['headers'])
    for row in team_info['rows']:
        print(row)
    print('-' * 40)

Team: Atlanta Hawks Future NBA Draft Picks
Headers: ['Year', 'First Round', 'Second Round']
['2026', 'More favorable of (i) less favorable of ATL and SAN and (ii) less favorable of (a) CLE and (b) more favorable of UTH 1-8 and MIN [or (ii) CLE if UTH not conveyable] then less favorable of (i) and (ii) to CLE; more favorable of ATL and SAN to SAN (via UTH swap for MIN; via UTH swap of UTH or MIN for CLE; via SAN swap for ATL; via ATL swap of ATL or SAN for CLE, UTH or MIN) ; More favorable of NOP and MIL (via NOP swap for MIL) 2', 'To BRK (via GOS) ; Less favorable of (i) BOS and (ii) more favorable of IND and MIA (via MIA to IND to MIL; via MEM swap of IND or MIA for BOS; via BOS to ATL) 1']
['2027', 'To SAN ; Less favorable of MIL and NOP if either or both 5-30 (via NOP) 1', 'To POR (via BOS to MEM) 0']
['2028', 'More favorable of (i) ATL and (ii) less favorable of UTH and CLE then least favorable of all to CLE (via UTH swap for CLE; via ATL swap for UTH or CLE) 1', 'To BRK (via GOS) 

In [None]:
'''
Preparing documents for RAG/FAISS by creating structured entries for each draft pick
'''
import json
import pandas as pd

rag_docs = []
for team_info in team_tables:
    team = team_info['team']
    headers = team_info['headers']
    for row in team_info['rows']:
        if len(row) != len(headers):
            continue
        year = row[0]
        first_round = row[1] if len(row) > 1 else ""
        second_round = row[2] if len(row) > 2 else ""
        # Add first round as a doc
        rag_docs.append({
            'team': team,
            'year': year,
            'round': 'First',
            'details': first_round.strip()
        })
        # Add second round as a doc
        rag_docs.append({
            'team': team,
            'year': year,
            'round': 'Second',
            'details': second_round.strip()
        })

# Save as CSV for tabular use
pd.DataFrame(rag_docs).to_csv('nba_draft_picks_rag.csv', index=False)

# Save as JSONL for vector DB (FAISS) ingestion
with open('nba_draft_picks_rag.jsonl', 'w') as f:
    for doc in rag_docs:
        f.write(json.dumps(doc) + '\n')

print('Saved nba_draft_picks_rag.csv and nba_draft_picks_rag.jsonl')

Saved nba_draft_picks_rag.csv and nba_draft_picks_rag.jsonl


In [None]:
'''
Collecting NBA Draft Picks
'''

import pandas as pd


picks = {}

for table in tables:
    team_name = table.find_previous("h2").text.strip()
    if team_name not in picks:
        picks[team_name] = {}
    for row in table.select("tr")[1:]:
        cols = [td.text.strip() for td in row.find_all("td")]
        if not cols or len(cols) < 3:
            continue
        year = cols[0]
        round_info = cols[1]
        if "1st" in round_info:
            picks[team_name][year] = "1st"

# Get all unique years
all_years = sorted({year for team in picks for year in picks[team]})

# Build DataFrame
df = pd.DataFrame(index=picks.keys(), columns=all_years)
for team in picks:
    for year in all_years:
        df.loc[team, year] = picks[team].get(year, "")

# Save to CSV
df.to_csv("nba_first_round_picks.csv")
df.head()

# Retrieving Payroll Notes

In [1]:
import requests
from bs4 import BeautifulSoup

# URL for Atlanta Hawks contracts page
url = 'https://www.basketball-reference.com/contracts/ATL.html'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find the Payroll Notes section
payroll_notes = []
for comment in soup.find_all(string=lambda text: isinstance(text, (str)) and 'Payroll Notes' in text):
    # Find the parent div or section containing the notes
    parent = comment.find_parent(['div', 'section'])
    if parent:
        # Extract all text after 'Payroll Notes' header
        notes = parent.get_text(separator='\n', strip=True)
        payroll_notes.append(notes)

# Fallback in case of fail
if not payroll_notes:
    for h2 in soup.find_all(['h2', 'h3']):
        if 'Payroll Notes' in h2.text:
            ul = h2.find_next('ul')
            if ul:
                notes = [li.get_text(strip=True) for li in ul.find_all('li')]
                payroll_notes.extend(notes)

print('Payroll Notes:')
for note in payroll_notes:
    print(note)

Payroll Notes:
