In [11]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# URL of the Basketball Reference page
url = "https://www.basketball-reference.com/leagues/NBA_2020.html"

# Send a request to the page
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Define a function to parse the standings table
def parse_table(table_id, conference_name):
    table = soup.find("table", {"id": table_id})
    headers = [th.text.strip() for th in table.find("thead").find_all("th")]  # Include the team name header
    data = []
    rows = table.find("tbody").find_all("tr")
    for row in rows:
        if row.find("th", {"scope": "row"}):  # Exclude separator rows
            # Extract team name
            team_name = row.find("th", {"scope": "row"}).text.strip()
            # Extract stats
            cells = row.find_all("td")
            row_data = [team_name] + [cell.text.strip() for cell in cells]
            data.append(row_data)
    df = pd.DataFrame(data, columns=headers)
    df.insert(0, "Conference", conference_name)  # Add a conference column
    return df


# Parse Eastern and Western Conference tables
eastern_df = parse_table("confs_standings_E", "Eastern Conference")
western_df = parse_table("confs_standings_W", "Western Conference")

# Save the data into CSV files
eastern_df.to_csv("NBA_2020_Eastern_Conference_Standings.csv", index=False)
western_df.to_csv("NBA_2020_Western_Conference_Standings.csv", index=False)

# Display success message
print("Eastern and Western Conference standings saved to CSV files.")


Eastern and Western Conference standings saved to CSV files.


In [10]:
import pandas as pd

# Updated Data for the NBA Champions
champions_data = [
    {
        "Season": "2023-24",
        "Champion": "Boston Celtics",
        "Finals MVP": "Jaylen Brown",
        "Coach": "Joe Mazulla",
        "Record": "64-18",
        "Playoff Record": "16-3",
        "Playoff Opponents": [
            "Miami Heat (4-1)",
            "Cleveland Cavaliers (4-1)",
            "Indiana Pacers (4-0)",
            "Dallas Mavericks (4-1)"
        ]
    },
    {
        "Season": "2022-23",
        "Champion": "Denver Nuggets",
        "Finals MVP": "Nikola Jokić",
        "Coach": "Michael Malone",
        "Record": "53-29",
        "Playoff Record": "16-4",
        "Playoff Opponents": [
            "Minnesota Timberwolves (4-1)",
            "Phoenix Suns (4-2)",
            "Los Angeles Lakers (4-0)",
            "Miami Heat (4-1)"
        ]
    },
    {
        "Season": "2021-22",
        "Champion": "Golden State Warriors",
        "Finals MVP": "Stephen Curry",
        "Coach": "Steve Kerr",
        "Record": "53-29",
        "Playoff Record": "16-6",
        "Playoff Opponents": [
            "Denver Nuggets (4-1)",
            "Memphis Grizzlies (4-2)",
            "Dallas Mavericks (4-1)",
            "Boston Celtics (4-2)"
        ]
    },
    {
        "Season": "2020-21",
        "Champion": "Milwaukee Bucks",
        "Finals MVP": "Giannis Antetokounmpo",
        "Coach": "Mike Budenholzer",
        "Record": "46-26",
        "Playoff Record": "16-7",
        "Playoff Opponents": [
            "Miami Heat (4-0)",
            "Brooklyn Nets (4-3)",
            "Atlanta Hawks (4-2)",
            "Phoenix Suns (4-2)"
        ]
    },
    {
        "Season": "2019-20",
        "Champion": "Los Angeles Lakers",
        "Finals MVP": "LeBron James",
        "Coach": "Frank Vogel",
        "Record": "52-19",
        "Playoff Record": "16-5",
        "Playoff Opponents": [
            "Portland Trail Blazers (4-1)",
            "Houston Rockets (4-1)",
            "Denver Nuggets (4-1)",
            "Miami Heat (4-2)"
        ]
    },
]

# Convert data to a DataFrame
df_champions = pd.DataFrame(champions_data)

# Convert "Playoff Opponents" list to a single string
df_champions["Playoff Opponents"] = df_champions["Playoff Opponents"].apply(lambda x: ", ".join(x))

# Save to a CSV file
file_path = "NBA_Champions_Last_5_Seasons_Updated.csv"
df_champions.to_csv(file_path, index=False)

print(f"Data saved to {file_path}")


Data saved to NBA_Champions_Last_5_Seasons_Updated.csv


In [13]:
##getting the championship coaches' datasets

import requests
import pandas as pd
from bs4 import BeautifulSoup

# URL for the coach's Basketball Reference page
url = "https://www.basketball-reference.com/coaches/vogelfr99c.html"

# Send a GET request to the URL
response = requests.get(url)

# Parse the page with BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Find the coaching record table
table = soup.find("table", {"id": "coach-stats"})

# Check if the table is found
if not table:
    print("No coaching record table found on the page.")
else:
    # Extract the headers (cleaning up multi-level headers)
    raw_headers = table.find("thead").find_all("tr")
    headers = [th.text.strip() for th in raw_headers[1].find_all("th")]  # Use second row of headers

    # Extract table rows
    data = []
    rows = table.find("tbody").find_all("tr")
    for row in rows:
        # Skip rows without valid season data
        row_header = row.find("th", {"scope": "row"})
        if row_header and "Assistant Coach" not in row.text:  # Exclude "Assistant Coach" rows
            # Extract row data
            cells = [cell.text.strip() for cell in row.find_all(["th", "td"])]
            data.append(cells)

    # Check if any valid rows were extracted
    if not data:
        print("No valid rows found in the coaching record table.")
    else:
        # Create a DataFrame
        coaching_record_df = pd.DataFrame(data, columns=headers)

        # Save to a CSV file
        file_path = "Frank_Vogel_Coaching_Record_Cleaned.csv"
        coaching_record_df.to_csv(file_path, index=False)

        print(f"Coaching record table saved to {file_path}")


Coaching record table saved to Frank_Vogel_Coaching_Record_Cleaned.csv


In [27]:
import pandas as pd

# Data for the 2020-21 Milwaukee Bucks Team Miscellaneous Stats
bucks_misc_20_21_data = {
    "Category": [
        "W", "L", "PW", "PL", "MOV", "SOS", "SRS", "ORtg", "DRtg", "Pace",
        "FTr", "3PAr", "eFG%", "TOV%", "ORB%", "FT/FGA", "eFG%", "TOV%", 
        "DRB%", "FT/FGA", "Arena", "Attendance"
    ],
    "Team": [
        46, 26, 48, 24, 5.89, -0.32, 5.57, 117.2, 111.4, 102.2, 
        0.233, 0.404, 0.566, 12.0, 23.3, 0.177, 0.536, 11.5, 
        79.7, 0.157, "Fiserv Forum", "64,780"
    ],
    "Lg Rank": [
        7, 24, 5, 5, 3, 30, 4, 6, 10, 2, 
        24, 12, 2, 12, 9, 26, 13, 24, 
        3, 1, "", 10
    ]
}

# Convert to DataFrame
bucks_misc_20_21_df = pd.DataFrame(bucks_misc_20_21_data)

# Save to CSV
file_path = "Milwaukee_Bucks_2020_21_Team_Misc_Stats.csv"
bucks_misc_20_21_df.to_csv(file_path, index=False)

print(f"CSV file saved as {file_path}")


CSV file saved as Milwaukee_Bucks_2020_21_Team_Misc_Stats.csv


In [30]:
import pandas as pd

# Adding data for 2019-20, 2020-21, and 2021-22 Finals Four Factors
additional_finals_four_factors = {
    "Category": ["Pace", "eFG%", "TOV%", "ORB%", "FT/FGA", "ORtg", "PTS"],
    "2019_20_LAL": [94.1, 0.547, 13.0, 27.3, 0.200, 116.9, 110.0],
    "2019_20_MIA": [94.1, 0.534, 12.1, 16.3, 0.247, 111.1, 104.5],
    "2020_21_MIL": [96.0, 0.529, 10.2, 29.2, 0.190, 116.3, 111.7],
    "2020_21_PHO": [96.0, 0.555, 12.3, 17.4, 0.176, 113.8, 109.3],
    "2021_22_GSW": [95.3, 0.533, 12.9, 22.7, 0.136, 110.0, 104.8],
    "2021_22_BOS": [95.3, 0.522, 15.6, 23.3, 0.180, 105.8, 100.8]
}

# Load the existing Finals Four Factors dataset
existing_file_path = "Finals_Four_Factors_2022_23_and_2023_24.csv"
existing_finals_four_factors_df = pd.read_csv(existing_file_path)

# Create a new DataFrame for the additional data
additional_finals_four_factors_df = pd.DataFrame(additional_finals_four_factors)

# Combine the datasets
combined_finals_four_factors_df = pd.concat(
    [existing_finals_four_factors_df, additional_finals_four_factors_df.drop(columns=["Category"])],
    axis=1
)

# Update Category column for better readability
combined_finals_four_factors_df["Category"] = additional_finals_four_factors["Category"]

# Save the combined dataset
combined_file_path = "Combined_Finals_Four_Factors.csv"
combined_finals_four_factors_df.to_csv(combined_file_path, index=False)

print(f"Combined dataset saved as {combined_file_path}")


Combined dataset saved as Combined_Finals_Four_Factors.csv
