In [30]:
import numpy as np

import pandas as pd

from nba_api.stats.static import players
from nba_api.stats.endpoints import boxscoretraditionalv2
from nba_api.stats.endpoints import leaguegamefinder

from unidecode import unidecode



In [31]:
class Player:
    def __init__(self, player_name):
        self.player_name = player_name
        
    def getPlayerId(self):
        active_players = players.get_active_players()
        for player in active_players:
            if player['full_name'] == self.player_name:
                self.player_id = player['id']
                return player['id']
        return "No player found"
    def getGameStats2324(self):
        try:
            find_games = leaguegamefinder.LeagueGameFinder(player_id_nullable=self.player_id, season_nullable="2023-24")
            games = find_games.get_data_frames()[0]
            return games
        except Exception as e:
            print(f"Error fetching game stats for player {self.player_name}: {e}")
            
    def getGameStats2425(self):
        try:
            find_games = leaguegamefinder.LeagueGameFinder(player_id_nullable=self.player_id, season_nullable="2024-25")
            games = find_games.get_data_frames()[0]
            return games
        except Exception as e:
            print(f"Error fetching game stats for player {self.player_name}: {e}")

    def getPast6Pts(self, games):
        PTS = []
        count = 0
        c = 0
        while count < 6 and c < len(games):
            if games['MIN'][c] >= 25:
                PTS.append(games['PTS'][c])
                count += 1
            c += 1
        return PTS
    def getPast5Pts(self, games):
        PTS = []
        count = 0
        c = 0
        while count < 5 and c < len(games):
            if games['MIN'][c] >= 25:
                PTS.append(games['PTS'][c])
                count += 1
            c += 1
        return PTS

In [32]:
import time
def generateTraining():
    
    find_games = leaguegamefinder.LeagueGameFinder(season_nullable="2023-24", season_type_nullable="Regular Season")
    games = find_games.get_data_frames()[0]
    
    
    game_ids = []
    print("Running game search")
    for i in range(len(games)):
        if games['GAME_ID'][i] not in game_ids and str(games['GAME_ID'][i])[0] == '0':
            game_ids.append(games['GAME_ID'][i])
    print(f"Found {len(game_ids)} games. Running stats search.")

    
    starters = set() 
    for id in game_ids:
        try:
            boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=id)
            stats = boxscore.get_data_frames()[0]
            time.sleep(1)  
            for i in range(len(stats)):
                if stats['START_POSITION'][i] != "":
                    starters.add(stats['PLAYER_NAME'][i])
        except Exception as e:
            print(f"Error fetching box score for game {id}: {e}")
    print(f"Found {len(starters)} unique starters.")


    active_players = players.get_active_players()
    
    player_id_dict = {}
    for player in active_players:
        player_id_dict[player['full_name']] = player['id']


    
    PTS = []
    for player in starters:
        try:
            player_id = player_id_dict.get(unidecode(player))
            p = Player(player)
            p.player_id = player_id  
            games = p.getGameStats2324()
            points = p.getPast6Pts(games)
            if len(points) == 6: 
                points.insert(0, player)
                PTS.append(points)
            else:
                print(f"Player {player} has fewer than 6 games with at least 25 minutes played. Skipping.")
        except Exception as e:
            print(f"Error processing player {player}: {e}")
    PTS.reverse()
    
    return PTS

In [25]:
training_data = generateTraining()
df = pd.DataFrame(training_data, columns=['Player', 'Game1', 'Game2', 'Game3', 'Game4', 'Game5', 'Game6'])
df.to_csv('nba_player_points.csv', index=False)

Running game search
Found 1230 games. Running stats search.
Found 392 unique starters.
Player Jaylin Williams has fewer than 6 games with at least 25 minutes played. Skipping.
Player Bryce McGowens has fewer than 6 games with at least 25 minutes played. Skipping.
Player Garrison Mathews has fewer than 6 games with at least 25 minutes played. Skipping.
Player Jonathan Isaac has fewer than 6 games with at least 25 minutes played. Skipping.
Player Nassir Little has fewer than 6 games with at least 25 minutes played. Skipping.
Player Markieff Morris has fewer than 6 games with at least 25 minutes played. Skipping.
Player Drew Eubanks has fewer than 6 games with at least 25 minutes played. Skipping.
Player Svi Mykhailiuk has fewer than 6 games with at least 25 minutes played. Skipping.
Player Taj Gibson has fewer than 6 games with at least 25 minutes played. Skipping.
Player P.J. Tucker has fewer than 6 games with at least 25 minutes played. Skipping.
Player Mike Muscala has fewer than 6 ga

In [26]:
df = pd.read_csv("nba_player_points.csv")     

X = df[['Game1', 'Game2', 'Game3', 'Game4', 'Game5']]
y = df['Game6']  

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = LinearRegression()
model.fit(X_train, y_train)


In [33]:
def predict_next_points(games):
    games = [games]
    next_points = model.predict(games)
    return next_points[0]

player = Player("LeBron James")
player.player_id = player.getPlayerId()
games = player.getGameStats2425()
past_5 = player.getPast5Pts(games)
past_5.reverse()
print(past_5)
predicted_points = predict_next_points(past_5)
print(predicted_points)


[22, 31, 24, 33, 26]
26.182786702579723


