In [2]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.library.parameters import SeasonAll
import pandas as pd


In [5]:
# Get a list of all active NBA players
player_dict = players.get_active_players()

seasons = [f"{year}-{str(year+1)[-2:]}" for year in range(2015, 2022)]

total_seasons = len(seasons)

# Store the top shooter for each season
season_top_shooters = {}

for i, season in enumerate(seasons):
    season_3pt_shooters = []

    for player in player_dict:
        player_id = player['id']
        player_name = f"{player['first_name']} {player['last_name']}"

        try:
            # Get game log for the player for the specific season
            game_log = playergamelog.PlayerGameLog(player_id=player_id, season=season)
            game_log_df = game_log.get_data_frames()[0]

            # Calculate the total 3-point attempts and makes
            total_3pa = game_log_df['FG3A'].sum()
            total_3pm = game_log_df['FG3M'].sum()

            # Check if the player has at least 100 3PA
            if total_3pa >= 100:
                efficiency_3pm = total_3pm / total_3pa
                season_3pt_shooters.append({'Player': player_name, '3PT Efficiency': efficiency_3pm, '3PA': total_3pa})

        except Exception as e:
            print(f"Error processing player {player_name} for season {season}: {str(e)}")

    # Determine the player with the highest three-point shooting percentage for the season
    if season_3pt_shooters:
        top_shooter = max(season_3pt_shooters, key=lambda x: x['3PT Efficiency'])
        season_top_shooters[season] = top_shooter

    # Print the progress
    print(f"Processed {i+1} out of {total_seasons} seasons. ({(i+1)/total_seasons*100:.2f}%)")

# Print the top shooter for each season
print("\nLeague Leaders in 3-Point Shooting Percentage by Season (min 100 attempts):")
for season, shooter in season_top_shooters.items():
    print(f"{season}: {shooter['Player']} - {shooter['3PT Efficiency']:.3f} (3PA: {shooter['3PA']})")

Processed 1 out of 7 seasons. (14.29%)
Processed 2 out of 7 seasons. (28.57%)
Processed 3 out of 7 seasons. (42.86%)
Processed 4 out of 7 seasons. (57.14%)
Processed 5 out of 7 seasons. (71.43%)
Processed 6 out of 7 seasons. (85.71%)
Processed 7 out of 7 seasons. (100.00%)

League Leaders in 3-Point Shooting Percentage by Season (min 100 attempts):
2015-16: Josh Richardson - 0.461 (3PA: 115)
2016-17: Joe Ingles - 0.441 (3PA: 279)
2017-18: Reggie Bullock Jr. - 0.445 (3PA: 281)
2018-19: Joe Harris - 0.474 (3PA: 386)
2019-20: Seth Curry - 0.452 (3PA: 321)
2020-21: Joe Harris - 0.475 (3PA: 444)
2021-22: Luke Kennard - 0.449 (3PA: 423)


In [7]:
import random
from sklearn.linear_model import LinearRegression
from nba_api.stats.endpoints import playercareerstats, commonplayerinfo

def get_player_height(player_id):
    # Get player info
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id)
    player_info = player_info.get_normalized_dict()
    player_info = player_info.get('CommonPlayerInfo')[0]

    # Return player height
    return player_info.get('HEIGHT')

# Sample code to fetch player height and 3-point percentage

player_dict = players.get_active_players()
player_data = []

for player in player_dict:
    player_id = player['id']
    player_name = player['full_name']

    # Fetch career stats for the player
    career = playercareerstats.PlayerCareerStats(player_id=player_id)
    career_df = career.get_data_frames()[0]

    # Calculate career 3-point percentage
    total_3pa = career_df['FG3A'].sum()
    total_3pm = career_df['FG3M'].sum()
    three_pt_pct = total_3pm / total_3pa if total_3pa > 0 else None

    # Get player height
    height = get_player_height(player_id)

    if height is not None and three_pt_pct is not None:
        player_data.append({'Player': player_name, 'Height': height, '3PT%': three_pt_pct})

# Create a DataFrame
df = pd.DataFrame(player_data)

# Data preprocessing steps (e.g., converting height to a numerical value) go here

# Drop rows with missing values
df.dropna(inplace=True)

# Create a linear regression model
X = df[['Height']]
y = df['3PT%']
model = LinearRegression().fit(X, y)

# Output the coefficient and R-squared value
print('Coefficient:', model.coef_)
print('R-squared:', model.score(X, y))

ModuleNotFoundError: No module named 'sklearn'