In [11]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# URL of the Basketball Reference page
url = "https://www.basketball-reference.com/leagues/NBA_2020.html"

# Send a request to the page
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Define a function to parse the standings table
def parse_table(table_id, conference_name):
    table = soup.find("table", {"id": table_id})
    headers = [th.text.strip() for th in table.find("thead").find_all("th")]  # Include the team name header
    data = []
    rows = table.find("tbody").find_all("tr")
    for row in rows:
        if row.find("th", {"scope": "row"}):  # Exclude separator rows
            # Extract team name
            team_name = row.find("th", {"scope": "row"}).text.strip()
            # Extract stats
            cells = row.find_all("td")
            row_data = [team_name] + [cell.text.strip() for cell in cells]
            data.append(row_data)
    df = pd.DataFrame(data, columns=headers)
    df.insert(0, "Conference", conference_name)  # Add a conference column
    return df


# Parse Eastern and Western Conference tables
eastern_df = parse_table("confs_standings_E", "Eastern Conference")
western_df = parse_table("confs_standings_W", "Western Conference")

# Save the data into CSV files
eastern_df.to_csv("NBA_2020_Eastern_Conference_Standings.csv", index=False)
western_df.to_csv("NBA_2020_Western_Conference_Standings.csv", index=False)

# Display success message
print("Eastern and Western Conference standings saved to CSV files.")


Eastern and Western Conference standings saved to CSV files.


In [10]:
import pandas as pd

# Updated Data for the NBA Champions
champions_data = [
    {
        "Season": "2023-24",
        "Champion": "Boston Celtics",
        "Finals MVP": "Jaylen Brown",
        "Coach": "Joe Mazulla",
        "Record": "64-18",
        "Playoff Record": "16-3",
        "Playoff Opponents": [
            "Miami Heat (4-1)",
            "Cleveland Cavaliers (4-1)",
            "Indiana Pacers (4-0)",
            "Dallas Mavericks (4-1)"
        ]
    },
    {
        "Season": "2022-23",
        "Champion": "Denver Nuggets",
        "Finals MVP": "Nikola Jokić",
        "Coach": "Michael Malone",
        "Record": "53-29",
        "Playoff Record": "16-4",
        "Playoff Opponents": [
            "Minnesota Timberwolves (4-1)",
            "Phoenix Suns (4-2)",
            "Los Angeles Lakers (4-0)",
            "Miami Heat (4-1)"
        ]
    },
    {
        "Season": "2021-22",
        "Champion": "Golden State Warriors",
        "Finals MVP": "Stephen Curry",
        "Coach": "Steve Kerr",
        "Record": "53-29",
        "Playoff Record": "16-6",
        "Playoff Opponents": [
            "Denver Nuggets (4-1)",
            "Memphis Grizzlies (4-2)",
            "Dallas Mavericks (4-1)",
            "Boston Celtics (4-2)"
        ]
    },
    {
        "Season": "2020-21",
        "Champion": "Milwaukee Bucks",
        "Finals MVP": "Giannis Antetokounmpo",
        "Coach": "Mike Budenholzer",
        "Record": "46-26",
        "Playoff Record": "16-7",
        "Playoff Opponents": [
            "Miami Heat (4-0)",
            "Brooklyn Nets (4-3)",
            "Atlanta Hawks (4-2)",
            "Phoenix Suns (4-2)"
        ]
    },
    {
        "Season": "2019-20",
        "Champion": "Los Angeles Lakers",
        "Finals MVP": "LeBron James",
        "Coach": "Frank Vogel",
        "Record": "52-19",
        "Playoff Record": "16-5",
        "Playoff Opponents": [
            "Portland Trail Blazers (4-1)",
            "Houston Rockets (4-1)",
            "Denver Nuggets (4-1)",
            "Miami Heat (4-2)"
        ]
    },
]

# Convert data to a DataFrame
df_champions = pd.DataFrame(champions_data)

# Convert "Playoff Opponents" list to a single string
df_champions["Playoff Opponents"] = df_champions["Playoff Opponents"].apply(lambda x: ", ".join(x))

# Save to a CSV file
file_path = "NBA_Champions_Last_5_Seasons_Updated.csv"
df_champions.to_csv(file_path, index=False)

print(f"Data saved to {file_path}")


Data saved to NBA_Champions_Last_5_Seasons_Updated.csv


In [13]:
##getting the championship coaches' datasets

import requests
import pandas as pd
from bs4 import BeautifulSoup

# URL for the coach's Basketball Reference page
url = "https://www.basketball-reference.com/coaches/vogelfr99c.html"

# Send a GET request to the URL
response = requests.get(url)

# Parse the page with BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Find the coaching record table
table = soup.find("table", {"id": "coach-stats"})

# Check if the table is found
if not table:
    print("No coaching record table found on the page.")
else:
    # Extract the headers (cleaning up multi-level headers)
    raw_headers = table.find("thead").find_all("tr")
    headers = [th.text.strip() for th in raw_headers[1].find_all("th")]  # Use second row of headers

    # Extract table rows
    data = []
    rows = table.find("tbody").find_all("tr")
    for row in rows:
        # Skip rows without valid season data
        row_header = row.find("th", {"scope": "row"})
        if row_header and "Assistant Coach" not in row.text:  # Exclude "Assistant Coach" rows
            # Extract row data
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            data.append(cells)

    # Check if any valid rows were extracted
    if not data:
        print("No valid rows found in the coaching record table.")
    else:
        # Create a DataFrame
        coaching_record_df = pd.DataFrame(data, columns=headers)

        # Save to a CSV file
        file_path = "Frank_Vogel_Coaching_Record_Cleaned.csv"
        coaching_record_df.to_csv(file_path, index=False)

        print(f"Coaching record table saved to {file_path}")


Coaching record table saved to Frank_Vogel_Coaching_Record_Cleaned.csv


In [27]:
import pandas as pd

# Data for the 2020-21 Milwaukee Bucks Team Miscellaneous Stats
bucks_misc_20_21_data = {
    "Category": [
        "W", "L", "PW", "PL", "MOV", "SOS", "SRS", "ORtg", "DRtg", "Pace",
        "FTr", "3PAr", "eFG%", "TOV%", "ORB%", "FT/FGA", "eFG%", "TOV%", 
        "DRB%", "FT/FGA", "Arena", "Attendance"
    ],
    "Team": [
        46, 26, 48, 24, 5.89, -0.32, 5.57, 117.2, 111.4, 102.2, 
        0.233, 0.404, 0.566, 12.0, 23.3, 0.177, 0.536, 11.5, 
        79.7, 0.157, "Fiserv Forum", "64,780"
    ],
    "Lg Rank": [
        7, 24, 5, 5, 3, 30, 4, 6, 10, 2, 
        24, 12, 2, 12, 9, 26, 13, 24, 
        3, 1, "", 10
    ]
}

# Convert to DataFrame
bucks_misc_20_21_df = pd.DataFrame(bucks_misc_20_21_data)

# Save to CSV
file_path = "Milwaukee_Bucks_2020_21_Team_Misc_Stats.csv"
bucks_misc_20_21_df.to_csv(file_path, index=False)

print(f"CSV file saved as {file_path}")


CSV file saved as Milwaukee_Bucks_2020_21_Team_Misc_Stats.csv


In [6]:
import pandas as pd

# Adding data for 2019-20, 2020-21, and 2021-22 Finals Four Factors
additional_finals_four_factors = {
    "Category": ["Pace", "eFG%", "TOV%", "ORB%", "FT/FGA", "ORtg", "PTS"],
    "2019_20_LAL": [94.1, 0.547, 13.0, 27.3, 0.200, 116.9, 110.0],
    "2019_20_MIA": [94.1, 0.534, 12.1, 16.3, 0.247, 111.1, 104.5],
    "2020_21_MIL": [96.0, 0.529, 10.2, 29.2, 0.190, 116.3, 111.7],
    "2020_21_PHO": [96.0, 0.555, 12.3, 17.4, 0.176, 113.8, 109.3],
    "2021_22_GSW": [95.3, 0.533, 12.9, 22.7, 0.136, 110.0, 104.8],
    "2021_22_BOS": [95.3, 0.522, 15.6, 23.3, 0.180, 105.8, 100.8]
}

# Load the existing Finals Four Factors dataset
existing_file_path = "Finals_Four_Factors_2022_23_and_2023_24.csv"
existing_finals_four_factors_df = pd.read_csv(existing_file_path)

# Create a new DataFrame for the additional data
additional_finals_four_factors_df = pd.DataFrame(additional_finals_four_factors)

# Combine the datasets
combined_finals_four_factors_df = pd.concat(
    [existing_finals_four_factors_df, additional_finals_four_factors_df.drop(columns=["Category"])],
    axis=1
)

# Update Category column for better readability
combined_finals_four_factors_df["Category"] = additional_finals_four_factors["Category"]

# Save the combined dataset
combined_file_path = "Combined_Finals_Four_Factors.csv"
combined_finals_four_factors_df.to_csv(combined_file_path, index=False)

print(f"Combined dataset saved as {combined_file_path}")


FileNotFoundError: [Errno 2] No such file or directory: 'Finals_Four_Factors_2022_23_and_2023_24.csv'

In [8]:
##print home records of finals teams

from nba_api.stats.endpoints import leaguegamefinder
import pandas as pd

# Fetch the Heat's games
team_id = "1610612749"  # Miami Heat Team ID
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_nullable="2020-21")

# Convert data to DataFrame
games = gamefinder.get_data_frames()[0]

# Filter home games
home_games = games[games['MATCHUP'].str.contains('vs.')]

# Use PLUS_MINUS if PTS_OPP is not available
if 'PTS' in home_games.columns and 'PTS_OPP' in home_games.columns:
    home_games['POINT_DIFFERENTIAL'] = home_games['PTS'] - home_games['PTS_OPP']
elif 'PLUS_MINUS' in home_games.columns:
    home_games['POINT_DIFFERENTIAL'] = home_games['PLUS_MINUS']
else:
    print("Error: Columns for points or PLUS_MINUS not found.")
    home_games['POINT_DIFFERENTIAL'] = None

# Select relevant columns
selected_columns = ['GAME_DATE', 'MATCHUP', 'RESULT', 'PTS', 'POINT_DIFFERENTIAL']
available_columns = [col for col in selected_columns if col in home_games.columns]
home_record = home_games[available_columns]

# Save to CSV
csv_file_path = "bucks_home_record_with_point_differentials_2020_2021.csv"
home_record.to_csv(csv_file_path, index=False)

print(f"Home record for Miami Heat with point differentials saved to {csv_file_path}.")


Home record for Miami Heat with point differentials saved to bucks_home_record_with_point_differentials_2020_2021.csv.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
from nba_api.stats.endpoints import leaguegamefinder
import pandas as pd

# Define teams and seasons
teams_seasons = [
    {"team": "Boston Celtics", "season": "2023-24"},
    {"team": "Dallas Mavericks", "season": "2023-24"},
    {"team": "Denver Nuggets", "season": "2022-23"},
    {"team": "Miami Heat", "season": "2022-23"},
    {"team": "Golden State Warriors", "season": "2021-22"},
    {"team": "Boston Celtics", "season": "2021-22"},
    {"team": "Milwaukee Bucks", "season": "2020-21"},
    {"team": "Phoenix Suns", "season": "202-21"},
    {"team": "Los Angeles Lakers", "season": "2019-20"},
    {"team": "Miami Heat", "season": "2019-20"},
]

# Initialize results list
results = []

for entry in teams_seasons:
    team = entry["team"]
    season = entry["season"]

    print(f"\nProcessing {team} in {season}...")

    try:
        # Fetch all games for the season
        finder = leaguegamefinder.LeagueGameFinder(season_nullable=season)
        games = finder.get_data_frames()[0]

        # Filter playoff games (Game IDs starting with "004")
        playoff_games = games[games['GAME_ID'].str.startswith('004')]

        # Filter games involving the specific team
        team_playoff_games = playoff_games[
            playoff_games['TEAM_NAME'] == team
        ]

        if team_playoff_games.empty:
            print(f"No playoff games found for {team} in {season}.")
            continue

        print(f"Playoff games found for {team} in {season}: {len(team_playoff_games)}")

        # Add relevant data to results
        for _, row in team_playoff_games.iterrows():
            result = {
                "Team": team,
                "Season": season,
                "Game_ID": row["GAME_ID"],
                "Matchup": row["MATCHUP"],
                "Game_Date": row["GAME_DATE"],
                "WL": row["WL"],
                "PTS": row.get("PTS", "N/A"),  # Default to "N/A" if column is missing
                "PTS_OPP": row.get("PTS_OPP", "N/A"),  # Default to "N/A" if column is missing
                "PLUS_MINUS": row.get("PLUS_MINUS", "N/A")  # Default to "N/A" if column is missing
            }
            results.append(result)

    except Exception as e:
        print(f"Error processing {team} in {season}: {e}")

# Convert results to DataFrame and CSV
if results:
    df = pd.DataFrame(results)
    csv_path = "team_specific_playoff_data_2020_2024.csv"
    df.to_csv(csv_path, index=False)
    print(f"\nData saved to {csv_path}.")
else:
    print("\nNo playoff data to save.")



Processing Boston Celtics in 2023-24...
Error processing Boston Celtics in 2023-24: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)

Processing Dallas Mavericks in 2023-24...
Playoff games found for Dallas Mavericks in 2023-24: 22

Processing Denver Nuggets in 2022-23...


In [None]:
from nba_api.stats.endpoints import leaguegamefinder, boxscoretraditionalv2
import pandas as pd

# Define seasons and team IDs
seasons = [
    {"season": "2023-24", "team_ids": ["1610612738", "1610612742"]},  # Boston Celtics, Dallas Mavericks
    {"season": "2022-23", "team_ids": ["1610612743", "1610612748"]},  # Denver Nuggets, Miami Heat
    {"season": "2021-22", "team_ids": ["1610612744", "1610612738"]},  # Golden State Warriors, Boston Celtics
    {"season": "2020-21", "team_ids": ["1610612749", "1610612756"]},  # Milwaukee Bucks, Phoenix Suns
    {"season": "2019-20", "team_ids": ["1610612747", "1610612748"]}, # Los Angeles Lakers, Miami Heat
]

# Initialize finals data
finals_data = []

# Process each season
for season in seasons:
    print(f"Processing season: {season['season']}")

    try:
        # Get all playoff games
        game_finder = leaguegamefinder.LeagueGameFinder(season_nullable=season["season"], season_type_nullable="Playoffs")
        playoff_games = game_finder.get_data_frames()[0]

        # Filter games for Finals by checking GAME_ID prefix and round description
        finals_games = playoff_games[playoff_games['GAME_ID'].str.startswith("004")]
        print(f"Total playoff games found for {season['season']}: {len(playoff_games)}")
        print(f"Finals games found for {season['season']}: {len(finals_games)}")

        if finals_games.empty:
            print(f"No Finals games found for season {season['season']}. Skipping.")
            continue

        # Process each team in Finals
        for team_id in season["team_ids"]:
            team_finals_games = finals_games[finals_games["TEAM_ID"] == int(team_id)]

            if team_finals_games.empty:
                print(f"No Finals games found for team ID {team_id} in season {season['season']}.")
                continue

            for _, game in team_finals_games.iterrows():
                game_id = game["GAME_ID"]
                matchup = game["MATCHUP"]
                game_date = game["GAME_DATE"]

                print(f"Processing game {game_id} for team ID {team_id}...")

                # Fetch box score data
                boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
                player_stats = boxscore.get_data_frames()[0]

                # Compute average points scored and allowed
                team_stats = player_stats[player_stats["TEAM_ID"] == int(team_id)]
                avg_points_scored = team_stats["PTS"].sum()
                avg_points_allowed = player_stats[player_stats["TEAM_ID"] != int(team_id)]["PTS"].sum()

                # Append data
                finals_data.append({
                    "Season": season["season"],
                    "Team_ID": team_id,
                    "Game_ID": game_id,
                    "Matchup": matchup,
                    "Game_Date": game_date,
                    "Avg_Points_Scored": avg_points_scored,
                    "Avg_Points_Allowed": avg_points_allowed,
                })

                print(f"Game {game_id} processed. Points Scored: {avg_points_scored}, Points Allowed: {avg_points_allowed}")

    except Exception as e:
        print(f"Error processing season {season['season']}: {e}")

# Save results
if finals_data:
    df = pd.DataFrame(finals_data)
    csv_file_path = "refined_finals_data.csv"
    df.to_csv(csv_file_path, index=False)
    print(f"Finals data saved to {csv_file_path}.")
else:
    print("No Finals data to save.")


Processing season: 2023-24
Total playoff games found for 2023-24: 190
Finals games found for 2023-24: 164
Processing game 0042300405 for team ID 1610612738...
Game 0042300405 processed. Points Scored: 106.0, Points Allowed: 88.0
Processing game 0042300404 for team ID 1610612738...
Game 0042300404 processed. Points Scored: 84.0, Points Allowed: 122.0
Processing game 0042300403 for team ID 1610612738...
Game 0042300403 processed. Points Scored: 106.0, Points Allowed: 99.0
Processing game 0042300402 for team ID 1610612738...
Game 0042300402 processed. Points Scored: 105.0, Points Allowed: 98.0
Processing game 0042300401 for team ID 1610612738...
Game 0042300401 processed. Points Scored: 107.0, Points Allowed: 89.0
Processing game 0042300304 for team ID 1610612738...
Game 0042300304 processed. Points Scored: 105.0, Points Allowed: 102.0
Processing game 0042300303 for team ID 1610612738...
Game 0042300303 processed. Points Scored: 114.0, Points Allowed: 111.0
Processing game 0042300302 for 

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# URL of the target page
url = "https://www.basketball-reference.com/playoffs/"

# Fetch the page
response = requests.get(url)
if response.status_code != 200:
    print(f"Error fetching the page. Status code: {response.status_code}")
    exit()

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Locate the table with ID 'all_champions_index'
table = soup.find('table', id='all_champions_index')
if table is None:
    print("Error: Table with ID 'all_champions_index' not found.")
    exit()

# Read the table into a DataFrame using pandas
df = pd.read_html(str(table))[0]

# Debug: Print the raw data
print("Initial DataFrame:")
print(df.head())
print("Columns:", df.columns)

# Strip whitespace from column names
df.columns = df.columns.str.strip()

# Rename the first column to 'Year' if it contains year-like data
if 'Year' not in df.columns and len(df.columns) > 0:
    df.rename(columns={df.columns[0]: 'Year'}, inplace=True)

# Ensure 'Year' column exists and is numeric
if 'Year' in df.columns:
    # Remove non-numeric year rows
    df = df[df['Year'].apply(lambda x: str(x).isdigit())]
    df['Year'] = df['Year'].astype(int)  # Convert to integers
else:
    print("Error: 'Year' column not found in the DataFrame.")
    exit()

# Filter rows for years 2000–2024
df = df[(df['Year'] >= 2000) & (df['Year'] <= 2024)]

# Debug: Print the filtered DataFrame
print("Filtered DataFrame content:")
print(df)

# Save the filtered DataFrame to a CSV file
csv_file_path = "nba_aba_champions_2000_2024.csv"
df.to_csv(csv_file_path, index=False)
print(f"Filtered data saved to {csv_file_path}.")


Error: Table with ID 'all_champions_index' not found.


ValueError: No tables found

: 