# NBA Height Vs. Shot Location
A comparative analysis of how player height is related to their shot selection

## Get rosters

In [23]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from io import StringIO

# Get roster of one team as an example

url = "https://www.basketball-reference.com/teams/ATL/2025.html"

# Get the HTML page
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Find the roster table by its ID
table = soup.find("table", {"id": "roster"})

# Convert the HTML table to a DataFrame using StringIO
df = pd.read_html(StringIO(str(table)))[0]

# Show the result
print(df.head())

   No.              Player Pos    Ht   Wt          Birth Date  Birth Exp  \
0    0     Dominick Barlow  SF   6-9  221        May 26, 2003  us US   2   
1   13  Bogdan BogdanoviÄ  SG   6-5  220     August 18, 1992  rs RS   7   
2    4         Kobe Bufkin  SG   6-4  195  September 21, 2003  us US   1   
3   15        Clint Capela   C  6-10  240        May 18, 1994  ch CH  10   
4    5       Dyson Daniels  SG   6-8  199      March 17, 2003  au AU   2   

    College  
0       NaN  
1       NaN  
2  Michigan  
3       NaN  
4       NaN  


In [24]:
# List of NBA team abbreviations (2024)
nba_team_abbr = [
    "ATL", "BOS", "BRK", "CHI", "CHO", "CLE", "DAL", "DEN", "DET", "GSW",
    "HOU", "IND", "LAC", "LAL", "MEM", "MIA", "MIL", "MIN", "NOP", "NYK",
    "OKC", "ORL", "PHI", "PHO", "POR", "SAC", "SAS", "TOR", "UTA", "WAS"
]
print(nba_team_abbr)

['ATL', 'BOS', 'BRK', 'CHI', 'CHO', 'CLE', 'DAL', 'DEN', 'DET', 'GSW', 'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOP', 'NYK', 'OKC', 'ORL', 'PHI', 'PHO', 'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS']


In [25]:
# Generate URLs for all teams
urls = [f"https://www.basketball-reference.com/teams/{team}/2025.html" for team in nba_team_abbr]
print(urls)


['https://www.basketball-reference.com/teams/ATL/2025.html', 'https://www.basketball-reference.com/teams/BOS/2025.html', 'https://www.basketball-reference.com/teams/BRK/2025.html', 'https://www.basketball-reference.com/teams/CHI/2025.html', 'https://www.basketball-reference.com/teams/CHO/2025.html', 'https://www.basketball-reference.com/teams/CLE/2025.html', 'https://www.basketball-reference.com/teams/DAL/2025.html', 'https://www.basketball-reference.com/teams/DEN/2025.html', 'https://www.basketball-reference.com/teams/DET/2025.html', 'https://www.basketball-reference.com/teams/GSW/2025.html', 'https://www.basketball-reference.com/teams/HOU/2025.html', 'https://www.basketball-reference.com/teams/IND/2025.html', 'https://www.basketball-reference.com/teams/LAC/2025.html', 'https://www.basketball-reference.com/teams/LAL/2025.html', 'https://www.basketball-reference.com/teams/MEM/2025.html', 'https://www.basketball-reference.com/teams/MIA/2025.html', 'https://www.basketball-reference.com/t

In [26]:
# List to store each team's roster DataFrame
team_rosters = []

for abbr, url in zip(nba_team_abbr, urls):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    table = soup.find("table", {"id": "roster"})
    if table is not None:
        df_team = pd.read_html(StringIO(str(table)))[0]
        df_team["Team"] = abbr  # Add team abbreviation column
        team_rosters.append(df_team)
    else:
        team_rosters.append(None)  # In case the roster table is missing

In [27]:
# Combine all non-None DataFrames in team_rosters into a single DataFrame
all_players_df = pd.concat([df for df in team_rosters if df is not None], ignore_index=True)
print(all_players_df.head())

  No.              Player Pos    Ht   Wt          Birth Date  Birth Exp  \
0   0     Dominick Barlow  SF   6-9  221        May 26, 2003  us US   2   
1  13  Bogdan BogdanoviÄ  SG   6-5  220     August 18, 1992  rs RS   7   
2   4         Kobe Bufkin  SG   6-4  195  September 21, 2003  us US   1   
3  15        Clint Capela   C  6-10  240        May 18, 1994  ch CH  10   
4   5       Dyson Daniels  SG   6-8  199      March 17, 2003  au AU   2   

    College Team  
0       NaN  ATL  
1       NaN  ATL  
2  Michigan  ATL  
3       NaN  ATL  
4       NaN  ATL  


## Get Shooting Data