In [2]:
#Record: 4-3
#Parlay Record: 1-3

In [None]:
#Player Averages vs every team
import pandas as pd
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import players

# Step 1: Get player game log stats for the last 5 years
def get_player_gamelog(player_id, seasons):
    all_games = []
    for season in seasons:
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season).get_data_frames()[0]
        all_games.append(gamelog)
    return pd.concat(all_games, ignore_index=True)

# Step 5: Calculate the averages for the player against each team
def calculate_averages_vs_teams(player_data):
    # Create a column for the opponent's team abbreviation
    player_data['OPPONENT'] = player_data['MATCHUP'].apply(lambda x: x.split()[2])  # Extracting opponent team

    # Convert WL to numerical values: W = 1, L = 0
    player_data['WL'] = player_data['WL'].map({'W': 1, 'L': 0})

    averages = player_data.groupby('OPPONENT').agg({
        'WL': 'mean',
        'MIN': 'mean',
        'FGM': 'mean',
        'FGA': 'mean',
        'FG_PCT': 'mean',
        'FG3M': 'mean',
        'FG3A': 'mean',
        'FG3_PCT': 'mean',
        'FTM': 'mean',
        'FTA': 'mean',
        'FT_PCT': 'mean',
        'OREB': 'mean',
        'DREB': 'mean',
        'REB': 'mean',
        'AST': 'mean',
        'STL': 'mean',
        'BLK': 'mean',
        'TOV': 'mean',
        'PF': 'mean',
        'PTS': 'mean'
    }).reset_index()
    
    # Rename columns for clarity
    averages.rename(columns={'WL': 'Win_Loss_Ratio'}, inplace=True)

    return averages

# Main function
def main():
    # Get player and team data
    player_name = input("Enter a player's full name (e.g., LeBron James): ")
    nba_players = players.get_players()

    # Find player ID
    player_id = None
    for player in nba_players:
        if player['full_name'].lower() == player_name.lower():
            player_id = player['id']
            break

    if player_id is None:
        print("Player not found.")
        return

    # Seasons to retrieve data for
    seasons = ['2024-25','2023-24','2022-23']

    # Get player game logs for these seasons
    player_data = get_player_gamelog(player_id, seasons)

    # Calculate player averages against each team
    team_averages = calculate_averages_vs_teams(player_data)

    # Display final data
    print(team_averages)

if __name__ == "__main__":
    main()


In [None]:
#Linear Regression
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import playergamelog, commonteamroster
import time
from requests.exceptions import ReadTimeout, ConnectionError

def get_team_roster(team_abbreviation):
    try:
        team_info = teams.find_team_by_abbreviation(team_abbreviation)
        if not team_info:
            raise ValueError(f"Team '{team_abbreviation}' not found.")
        
        team_id = team_info['id']
        roster = commonteamroster.CommonTeamRoster(team_id=team_id).get_data_frames()[0]
        return roster['PLAYER'].tolist()
    except Exception as e:
        print(f"Error getting roster: {e}")
        return []

def get_player_data(player_name, max_retries=3):
    for attempt in range(max_retries):
        try:
            player_dict = players.find_players_by_full_name(player_name)
            if not player_dict:
                print(f"Player '{player_name}' not found.")
                return None
            
            player_id = player_dict[0]['id']
            
            # Get data for both seasons
            current_season = playergamelog.PlayerGameLog(
                player_id=player_id, 
                season='2024-25',
                timeout=60
            ).get_data_frames()[0]
            
            previous_season = playergamelog.PlayerGameLog(
                player_id=player_id, 
                season='2023-24',
                timeout=60
            ).get_data_frames()[0]
            
            # Combine the data
            combined_data = pd.concat([current_season, previous_season], ignore_index=True)
            
            print(f"Data fetched successfully for {player_name}")
            print(f"Total games: {len(combined_data)} (Current season: {len(current_season)}, Previous season: {len(previous_season)})")
            
            return combined_data
            
        except (ReadTimeout, ConnectionError) as e:
            if attempt < max_retries - 1:
                print(f"Attempt {attempt + 1} failed for {player_name}. Retrying...")
                time.sleep(2)
            else:
                print(f"Failed to fetch data for {player_name} after {max_retries} attempts")
                return None

def preprocess_game_log(game_log):
    # Convert date to datetime
    game_log['GAME_DATE'] = pd.to_datetime(game_log['GAME_DATE'])    
    # Create home/away indicator
    game_log['HOME_AWAY'] = np.where(game_log['MATCHUP'].str.contains('@'), 'Away', 'Home')
    
    # Convert columns to float
    for col in ['PTS', 'REB', 'AST', 'BLK', 'STL', 'FGM', 'FGA', 'FG_PCT', 
                'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 
                'OREB', 'DREB', 'TOV', 'PF', 'PLUS_MINUS']:
        game_log[col] = game_log[col].astype(float)

    # Sort by date before calculating rolling averages
    game_log = game_log.sort_values('GAME_DATE', ascending=True)

    # Create rolling averages for recent games
    rolling_window = 5
    for stat in ['PTS', 'REB', 'AST', 'BLK', 'STL', 'FGM', 'FGA', 'FTM', 'OREB', 'DREB']:
        game_log[f'AVG_{stat}'] = game_log[stat].rolling(window=rolling_window).mean()

    # Drop NaN values that may have been created by rolling averages
    game_log.dropna(inplace=True)

    return game_log

# ... (keep all previous imports and functions up to preprocess_game_log) ...

def train_model(game_log):
    # Select features and target variable
    features = game_log[['AVG_PTS', 'AVG_REB', 'AVG_AST', 'AVG_BLK', 'AVG_STL', 
                         'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 
                         'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'TOV', 'PF', 'PLUS_MINUS']]
    target = game_log['PTS']
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

    # Create and train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions and calculate the mean squared error
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f'Test MSE: {mse:.2f}')

    return model

def predict_performance_against_team(model, game_log, opponent_team):
    # Filter for games against the specific opponent
    opponent_games = game_log[game_log['MATCHUP'].str.contains(opponent_team)]
    
    if opponent_games.empty:
        print(f"No previous games found against team: {opponent_team}.")
        return None

    # Calculate averages for the opponent games
    avg_stats = opponent_games[['PTS', 'REB', 'AST', 'BLK', 'STL', 
                               'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 
                               'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'TOV', 
                               'PF', 'PLUS_MINUS']].mean()

    # Prepare input features for prediction
    features = pd.DataFrame({
        'AVG_PTS': [avg_stats['PTS']],
        'AVG_REB': [avg_stats['REB']],
        'AVG_AST': [avg_stats['AST']],
        'AVG_BLK': [avg_stats['BLK']],
        'AVG_STL': [avg_stats['STL']],
        'FGM': [avg_stats['FGM']],
        'FGA': [avg_stats['FGA']],
        'FG_PCT': [avg_stats['FG_PCT']],
        'FG3M': [avg_stats['FG3M']],
        'FG3A': [avg_stats['FG3A']],
        'FG3_PCT': [avg_stats['FG3_PCT']],
        'FTM': [avg_stats['FTM']],
        'FTA': [avg_stats['FTA']],
        'FT_PCT': [avg_stats['FT_PCT']],
        'OREB': [avg_stats['OREB']],
        'DREB': [avg_stats['DREB']],
        'TOV': [avg_stats['TOV']],
        'PF': [avg_stats['PF']],
        'PLUS_MINUS': [avg_stats['PLUS_MINUS']]
    })
    
    # Make prediction
    predicted_points = model.predict(features)[0]
    return predicted_points

def main():
    # Get team abbreviation from user
    team_abbrev = input("Enter team abbreviation (e.g., 'NYK' for Knicks): ")
    opponent_team = input("Enter the opponent team abbreviation (e.g., 'LAC'): ")
    
    # Get roster
    roster = get_team_roster(team_abbrev)
    print(f"\nAnalyzing {len(roster)} players from {team_abbrev}...")
    
    # Store predictions
    predictions = {}
    
    # Process each player
    for player_name in roster:
        print(f"\nProcessing {player_name}...")
        
        # Get player data
        game_log = get_player_data(player_name)
        if game_log is None or len(game_log) < 5:  # Need at least 5 games for rolling averages
            print(f"Insufficient data for {player_name}")
            continue
            
        try:
            # Process data and make prediction
            processed_log = preprocess_game_log(game_log)
            model = train_model(processed_log)
            predicted_points = predict_performance_against_team(model, processed_log, opponent_team)
            
            if predicted_points is not None:
                predictions[player_name] = predicted_points
                
        except Exception as e:
            print(f"Error processing {player_name}: {e}")
            continue
    
    # Display results
    print(f"\nPredicted points against {opponent_team}:")
    print("-" * 50)
    for player, points in sorted(predictions.items(), key=lambda x: x[1], reverse=True):
        print(f"{player:<30} {points:.1f} points")

if __name__ == "__main__":
    main()

In [None]:
#Monte Carlo
import pandas as pd
import numpy as np
from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import playergamelog, commonteamroster
import time
from requests.exceptions import ReadTimeout, ConnectionError

def get_team_roster(team_abbreviation):
    try:
        team_info = teams.find_team_by_abbreviation(team_abbreviation)
        if not team_info:
            raise ValueError(f"Team '{team_abbreviation}' not found.")
        
        team_id = team_info['id']
        roster = commonteamroster.CommonTeamRoster(team_id=team_id).get_data_frames()[0]
        return roster['PLAYER'].tolist()
    except Exception as e:
        print(f"Error getting roster: {e}")
        return []

def get_player_data(player_name, max_retries=3):
    for attempt in range(max_retries):
        try:
            player_dict = players.find_players_by_full_name(player_name)
            if not player_dict:
                print(f"Player '{player_name}' not found.")
                return None
            
            player_id = player_dict[0]['id']
            
            # Get data for both seasons
            current_season = playergamelog.PlayerGameLog(
                player_id=player_id, 
                season='2024-25',
                timeout=60
            ).get_data_frames()[0]
            
            previous_season = playergamelog.PlayerGameLog(
                player_id=player_id, 
                season='2023-24',
                timeout=60
            ).get_data_frames()[0]
            
            # Combine the data
            combined_data = pd.concat([current_season, previous_season], ignore_index=True)
            
            print(f"Data fetched successfully for {player_name}")
            print(f"Total games: {len(combined_data)} (Current season: {len(current_season)}, Previous season: {len(previous_season)})")
            
            return combined_data
            
        except (ReadTimeout, ConnectionError) as e:
            if attempt < max_retries - 1:
                print(f"Attempt {attempt + 1} failed for {player_name}. Retrying...")
                time.sleep(2)
            else:
                print(f"Failed to fetch data for {player_name} after {max_retries} attempts")
                return None

def monte_carlo_simulation(game_log, opponent_team, num_simulations=10000, confidence_level=0.95):
    # Filter for games against the specific opponent
    opponent_games = game_log[game_log['MATCHUP'].str.contains(opponent_team)]
    
    if opponent_games.empty:
        print(f"No previous games found against team: {opponent_team}.")
        return None

    # Extract stats from games against the opponent
    points = opponent_games['PTS'].values
    rebounds = opponent_games['REB'].values
    assists = opponent_games['AST'].values
    
    # Calculate mean and standard deviation
    mean_points, std_points = points.mean(), points.std() if len(points) > 1 else 0
    mean_rebounds, std_rebounds = rebounds.mean(), rebounds.std() if len(rebounds) > 1 else 0
    mean_assists, std_assists = assists.mean(), assists.std() if len(assists) > 1 else 0

    # Run simulations
    simulated_points = np.random.normal(loc=mean_points, scale=std_points, size=num_simulations)
    simulated_rebounds = np.random.normal(loc=mean_rebounds, scale=std_rebounds, size=num_simulations)
    simulated_assists = np.random.normal(loc=mean_assists, scale=std_assists, size=num_simulations)
    
    return {
        "points": {
            "mean": simulated_points.mean(),
            "median": np.median(simulated_points),
            "std": simulated_points.std(),
            "ci_lower": np.percentile(simulated_points, (1-confidence_level) / 2 * 100),
            "ci_upper": np.percentile(simulated_points, (1 + confidence_level) / 2 * 100)
        },
        "rebounds": {
            "mean": simulated_rebounds.mean(),
            "median": np.median(simulated_rebounds),
            "std": simulated_rebounds.std(),
            "ci_lower": np.percentile(simulated_rebounds, (1-confidence_level) / 2 * 100),
            "ci_upper": np.percentile(simulated_rebounds, (1 + confidence_level) / 2 * 100)
        },
        "assists": {
            "mean": simulated_assists.mean(),
            "median": np.median(simulated_assists),
            "std": simulated_assists.std(),
            "ci_lower": np.percentile(simulated_assists, (1-confidence_level) / 2 * 100),
            "ci_upper": np.percentile(simulated_assists, (1 + confidence_level) / 2 * 100)
        }
    }

def main():
    # Get team abbreviation from user
    team_abbrev = input("Enter team abbreviation (e.g., 'NYK' for Knicks): ")
    opponent_team = input("Enter the opponent team abbreviation (e.g., 'LAC'): ")
    
    # Get roster
    roster = get_team_roster(team_abbrev)
    print(f"\nAnalyzing {len(roster)} players from {team_abbrev}...")
    
    # Store predictions
    all_predictions = {}
    
    # Process each player
    for player_name in roster:
        print(f"\nProcessing {player_name}...")
        
        # Get player data
        game_log = get_player_data(player_name)
        if game_log is None:
            print(f"No data available for {player_name}")
            continue
        
        # Run Monte Carlo simulation
        predicted_stats = monte_carlo_simulation(game_log, opponent_team)
        
        if predicted_stats is not None:
            all_predictions[player_name] = predicted_stats
    
    # Display results
    print(f"\nMonte Carlo Predictions against {opponent_team}:")
    print("=" * 100)
    
    # Sort players by predicted points
    sorted_players = sorted(all_predictions.items(), 
                          key=lambda x: x[1]['points']['mean'], 
                          reverse=True)
    
    for player_name, stats in sorted_players:
        print(f"\n{player_name}:")
        print(f"Points:   {stats['points']['mean']:.1f} ± {stats['points']['std']:.1f} "
              f"[{stats['points']['ci_lower']:.1f}, {stats['points']['ci_upper']:.1f}]")
        print(f"Rebounds: {stats['rebounds']['mean']:.1f} ± {stats['rebounds']['std']:.1f} "
              f"[{stats['rebounds']['ci_lower']:.1f}, {stats['rebounds']['ci_upper']:.1f}]")
        print(f"Assists:  {stats['assists']['mean']:.1f} ± {stats['assists']['std']:.1f} "
              f"[{stats['assists']['ci_lower']:.1f}, {stats['assists']['ci_upper']:.1f}]")

if __name__ == "__main__":
    main()

In [None]:
#Predicted PPG

from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.endpoints import commonteamroster
from nba_api.stats.static import teams

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import make_scorer

import matplotlib.pyplot as plt

# players we are looking for predictions on, use full name, not case-sensitive
people = []
# players who are on the home team
home_players = []

# list that will contain the player ids
ids = []

# dictionary used to determine which team a player is playing against
team = {'home': 'MIL'
         , 'away': 'UTA'}

match_up = {}
# pts = -3, ast = -8, reb = -9
stat = -3

#get home team from input

#get away team from input

#get stat from input

####
#Make get players from team a function
####

#get players on the home team
h_team = teams.find_team_by_abbreviation(abbreviation= team['home'])
h_id = h_team['id']
h_roster = commonteamroster.CommonTeamRoster(team_id= h_id)
dfh = h_roster.get_dict()
#add the players on the home team to list of people
for item in dfh['resultSets'][0]['rowSet']:
    people.append(item[3])
    home_players.append(item[3])

#get players on the away team
a_team = teams.find_team_by_abbreviation(abbreviation= team['away'])
a_id = a_team['id']
a_roster = commonteamroster.CommonTeamRoster(team_id= a_id)
dfa = a_roster.get_dict()
#add the players on the away team to list of people
for item in dfa['resultSets'][0]['rowSet']:
    people.append(item[3])

# get the player id from full name
print(people)
for person in people:
    # get the player id from their full name
    player = players.find_players_by_full_name(person)
    person = person.lower()
    #print(player)
    try:
        ids.append(player[0]['id'])
    except:
        continue
    # determine the team a player is playing against based on weather they are home or away
    if person in home_players:
        match_up[person] = team['away']
    else:
        match_up[person] = team['home']

#print(ids)
#print(player[0].keys())
#print(match_up)

# loop through each pid and make prediction
# Make prediction a function
for pid in ids:

    name = players.find_player_by_id(pid)
    lower_name = name['full_name'].lower()

    # get the player game log using the nba api
    games = playergamelog.PlayerGameLog( player_id=str(pid), season='2023-24' )
    # create data frame and dictionary from the game log
    p = games.get_data_frames()
    test = games.get_dict()


    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', 100)
    #print(p)


    #get the data we will use
    i = 0
    j = 0
    avg_points = 0
    points = []
    match_up_points = 0
    home = []
    big_dict = dict()
    for row in test['resultSets'][0]['rowSet']:
        # calculate the player avg ppg
        if test['resultSets'][0]['rowSet'][i][5] != None:
            points.append(test['resultSets'][0]['rowSet'][i][stat])
            avg_points += test['resultSets'][0]['rowSet'][i][stat]

            # calculate the players avg points against this team
            if match_up[lower_name] in test['resultSets'][0]['rowSet'][i][4]:
                match_up_points += test['resultSets'][0]['rowSet'][i][stat]
                j += 1

            # Determine weather home or away, 0 is away, home is 1
            if '@' in test['resultSets'][0]['rowSet'][i][4]:
                home.append(0)
            else:
                home.append(1)

        i+=1

    if i != 0:
        avg_points = avg_points / i
    else:
        avg_points = 0
    #if player has never played team set matchup points to average
    if j != 0:
        match_up_points = match_up_points/j
    else:
        match_up_points = avg_points
    #print(points)
    #print(avg)
    if len(player) == 0:
        continue

    #################
    # prediction model for pts, ast, reb
    #
    #
    ###################

    # create our data set
    data = {'points': points
            , 'average_points': avg_points
            , 'home_or_away': home
            , 'match_up_points': match_up_points}
    df = pd.DataFrame(data)
    # do not make a prediction on players who have played less then 10 games
    if len(df) < 10:
        continue
    # Reverse the order of the DataFrame so the most recent game is first in the list
    df = df[::-1].reset_index(drop=True)

    # Create a new column 'next_game_points' shifted by one to represent the target variable
    df['next_game_points'] = df['points'].shift(-1)
    # Drop the last row with NaN in 'next_game_points' as there's no information about the next game
    df = df.dropna()
    #print(df)

    # Features and target variable
    X = df[['points', 'average_points', 'home_or_away', 'match_up_points']]
    y = df['next_game_points']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

    # Create a Linear Regression model
    linear_model = LinearRegression()
    # Train the model
    linear_model.fit(X_train, y_train)
    # Make predictions on the test set
    predictions_linear = linear_model.predict(X_test)


    # Create Random Forest Regressor model
    rf_model = RandomForestRegressor()
    # Train the model
    rf_model.fit(X_train, y_train)
    # Make predictions on the test set
    predictions_rf = rf_model.predict(X_test)

    # Evaluate models
    mae_linear = mean_absolute_error(y_test, predictions_linear)
    mae_rf = mean_absolute_error(y_test, predictions_rf)

    # Cross validate the model
    # Combine the features and target variable into one DataFrame
    cross_data = pd.concat([X, y], axis=1)

    # Define the number of folds for cross-validation
    num_folds = 5 # You can adjust the number of folds based on your preference

    # Set up KFold for cross-validation
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    # Perform cross-validation for Linear Regression
    linear_mae_cv = cross_val_score(linear_model, X, y, cv=kf, scoring=make_scorer(mean_absolute_error))
    # Perform cross-validation for Random Forest Regressor
    rf_mae_cv = cross_val_score(rf_model, X, y, cv=kf, scoring=make_scorer(mean_absolute_error))

    # Calculate the average cross-validation MAE for each model
    avg_linear_mae_cv = linear_mae_cv.mean()
    avg_rf_mae_cv = rf_mae_cv.mean()
    combined_cv_result = (avg_linear_mae_cv + avg_rf_mae_cv) / 2

    # Combine predictions using model averaging
    combined_predictions = (predictions_linear + predictions_rf) / 2
    # Evaluate the combined predictions
    mae_combined = mean_absolute_error(y_test, combined_predictions)

    # print the players name
    print(name['full_name'])
    # print points list and average
    print(f'Points in past games: {points}')
    print(f'Avg PPG: {avg_points}')
    print(f'Avg Match-up PPG: {match_up_points}')

    # Now, use the trained model to predict the points for the next game
    # Given the last known points and player's average points:
    last_known_points = points[0]
    print(last_known_points)
    player_average = df['average_points'].iloc[-1:]
    home = 0
    if lower_name in home_players:
        home = 1
    new_data = pd.DataFrame({'points': last_known_points, 'average_points': player_average,
                             'home_or_away': home, 'match_up_points': match_up_points})
    predicted_points_linear = linear_model.predict(new_data)
    predicted_points_rf = rf_model.predict(new_data)
    predicted_points_combined = (linear_model.predict(new_data) + rf_model.predict(new_data)) / 2

    # Print results
    print('\nLinear regression predictions:')
    print(f'Linear Regression - Mean Absolute Error: {mae_linear}')
    print(f'Linear Regression - Cross-Validation Mean Absolute Error: {linear_mae_cv.mean()}')
    print(f'Linear Predicted Points for the Next Game: {predicted_points_linear[0]}')
    print('\nRandom Forest Regression Predictions:')
    print(f'Random Forest Regression - Mean Absolute Error: {mae_rf}')
    print(f'Random Forest Regression - Cross-Validation Mean Absolute Error: {rf_mae_cv.mean()}')
    print(f'Random Forest predicted Points for the Next Game: {predicted_points_rf[0]}')
    print('\nCombined Predictions:')
    print(f'Combined Predictions - Mean Absolute Error: {mae_combined}')
    print(f'Combined Cross-Validation Result: {combined_cv_result}')
    print(f'Combined Predicted Points for the Next Game: {predicted_points_combined[0]}')
    print('***********************************************************************************\n')

    # Create scatter plot to display accuracy
    plot = False
    if plot:
        # Scatter plot predicted vs actual
        plt.scatter(predictions_linear, y_test, label='Linear Regression', color='blue')  # Swap x and y here
        plt.scatter(predictions_rf, y_test, label='Random Forest Regression', color='green')  # Swap x and y here
        plt.scatter(combined_predictions, y_test, label='Combined Predictions', color='orange')  # Swap x and y here

        # Add line of best fit
        linear_fit = np.polyfit(predictions_linear, y_test, 1)  # Swap x and y here
        rf_fit = np.polyfit(predictions_rf, y_test, 1)  # Swap x and y here
        combined_fit = np.polyfit(combined_predictions, y_test, 1)  # Swap x and y here

        # Add diagonal line for reference (ideal prediction)
        plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], linestyle='--', color='red',
             label='Ideal Prediction')

        # Set labels and title
        plt.xlabel('Predicted Points')
        plt.ylabel('Actual Points')
        plt.title(name['full_name'])

        # Add legend
        plt.legend()

        # Display the plot
        plt.show()


In [None]:
#Updated Deep Learning
import pandas as pd
import numpy as np
import requests
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from nba_api.stats.endpoints import commonteamroster
import time
import logging

# Function to retrieve the roster of a specific team
def get_team_roster(team_id, retries=3):
    for attempt in range(retries):
        try:
            team_roster = commonteamroster.CommonTeamRoster(team_id=team_id).get_data_frames()[0]
            return team_roster['PLAYER'].tolist()
        except requests.ReadTimeout:
            if attempt < retries - 1:  # Don't wait after the last attempt
                print(f"Attempt {attempt + 1} failed. Retrying...")
                time.sleep(2)  # Wait a bit before retrying
            else:
                print("Failed to retrieve team roster after multiple attempts.")
                raise

# Function to retrieve player data from the NBA API
def get_player_data(player_name):
    player_dict = players.find_players_by_full_name(player_name)
    if not player_dict:
        raise ValueError(f"Player '{player_name}' not found.")
    
    player_id = player_dict[0]['id']
    game_log = playergamelog.PlayerGameLog(player_id=player_id, season='2023-24').get_data_frames()[0]
    return game_log

# Preprocessing function for the game logs
def preprocess_game_log(game_log):
    game_log['GAME_DATE'] = pd.to_datetime(game_log['GAME_DATE'])
    game_log['HOME_AWAY'] = np.where(game_log['MATCHUP'].str.contains('@'), 'Away', 'Home')
    
    for col in ['PTS', 'REB', 'AST', 'BLK', 'STL', 'FGM', 'FGA', 'FG_PCT', 
                'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 
                'OREB', 'DREB', 'TOV', 'PF', 'PLUS_MINUS']:
        game_log[col] = game_log[col].astype(float)

    rolling_window = 5
    for stat in ['PTS', 'REB', 'AST', 'BLK', 'STL', 'FGM', 'FGA', 'FTM', 'OREB', 'DREB']:
        game_log[f'AVG_{stat}'] = game_log[stat].rolling(window=rolling_window).mean()

    game_log.dropna(inplace=True)

    return game_log

# Model training function using Deep Learning
def train_model(game_log):
    features = game_log[['AVG_PTS', 'AVG_REB', 'AVG_AST', 'AVG_BLK', 'AVG_STL', 
                         'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 
                         'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'TOV', 'PF', 'PLUS_MINUS']]
    target = game_log[['PTS', 'REB', 'AST']]  
    
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(3))  # Output for points, rebounds, assists

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=100, batch_size=16, verbose=1)

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f'Test MSE: {mse:.2f}')

    return model, scaler

# Function to predict player performance against a specific team
def predict_performance_against_team(model, scaler, game_log, opponent_team):
    opponent_games = game_log[game_log['MATCHUP'].str.contains(opponent_team)]
    
    if opponent_games.empty:
        print(f"No previous games found against team: {opponent_team}.")
        return None

    avg_stats = opponent_games[['PTS', 'REB', 'AST', 'BLK', 'STL', 
                                 'FGM', 'FGA', 'FG_PCT', 'FG3M', 
                                 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 
                                 'FT_PCT', 'OREB', 'DREB', 'TOV', 
                                 'PF', 'PLUS_MINUS']].mean()

    features = pd.DataFrame({
        'AVG_PTS': [avg_stats['PTS']],
        'AVG_REB': [avg_stats['REB']],
        'AVG_AST': [avg_stats['AST']],
        'AVG_BLK': [avg_stats['BLK']],
        'AVG_STL': [avg_stats['STL']],
        'FGM': [avg_stats['FGM']],
        'FGA': [avg_stats['FGA']],
        'FG_PCT': [avg_stats['FG_PCT']],
        'FG3M': [avg_stats['FG3M']],
        'FG3A': [avg_stats['FG3A']],
        'FG3_PCT': [avg_stats['FG3_PCT']],
        'FTM': [avg_stats['FTM']],
        'FTA': [avg_stats['FTA']],
        'FT_PCT': [avg_stats['FT_PCT']],
        'OREB': [avg_stats['OREB']],
        'DREB': [avg_stats['DREB']],
        'TOV': [avg_stats['TOV']],
        'PF': [avg_stats['PF']],
        'PLUS_MINUS': [avg_stats['PLUS_MINUS']],
    })

    features = scaler.transform(features)
    predicted_stats = model.predict(features)[0]
    
    return predicted_stats



# Main function to run the workflow
def main():

    team_id = "1610612745"  # Example team ID for the Los Angeles Lakers
    opponent_team = input("Enter the opponent team (e.g., 'LAC'): ")
 
    
    roster = get_team_roster(team_id)
    
    predictions = {}
    
    for player_name in roster:
        try:
            game_log = get_player_data(player_name)
            processed_log = preprocess_game_log(game_log)
            model, scaler = train_model(processed_log)
            predicted_stats = predict_performance_against_team(model, scaler, processed_log, opponent_team)

            if predicted_stats is not None:
                predicted_points, predicted_rebounds, predicted_assists = predicted_stats
                predictions[player_name] = {
                    'Points': predicted_points,
                    'Rebounds': predicted_rebounds,
                    'Assists': predicted_assists
                }
        except Exception as e:
            print(f"Error processing {player_name}: {e}")
    
    print("\nPredicted performance against", opponent_team)
    for player, stats in predictions.items():
        print(f"{player}: Points: {stats['Points']:.2f}, Rebounds: {stats['Rebounds']:.2f}, Assists: {stats['Assists']:.2f}")

if __name__ == "__main__":
    main()


In [None]:
#Neural Networks

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import GridSearchCV


# Define the neural network architecture with dropout layers
def build_neural_network(input_shape, layers=3, neurons=64, dropout_rate=0.2):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=(input_shape,)))  # Input layer
    for _ in range(layers - 1):
        model.add(Dense(neurons, activation='relu'))  # Hidden layers
        model.add(Dropout(dropout_rate))  # Dropout layer for regularization

    model.add(Dense(3))  # Output layer for PTS, REB, AST
    return model

# Compile the neural network with optimizer learning rate parameter
def compile_neural_network(model, learning_rate=0.001):
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model



# Train the neural network
def train_neural_network(features, targets):
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Build and compile the neural network
    nn_model = build_neural_network(X_train_scaled.shape[1])
    nn_model = compile_neural_network(nn_model)

    # Train the model
    history = nn_model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

    # Evaluate the model
    y_pred = nn_model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    print(f'Test Mean Squared Error: {mse:.2f}')

    return nn_model, scaler


# Predict player stats against a specific team
def predict_player_stats_nn(model, scaler, averages, opponent_abbreviation):
    opponent_data = averages[averages['OPPONENT'] == opponent_abbreviation]
    if opponent_data.empty:
        print(f"No data available for opponent: {opponent_abbreviation}")
        return None

    # Prepare the features for prediction
    features = opponent_data.drop(columns=['OPPONENT', 'Win_Loss_Ratio'])
    features_scaled = scaler.transform(features)

    # Predict the stats using the trained neural network
    predictions = model.predict(features_scaled)

    return predictions


# Function to build the model for GridSearchCV
def create_model(layers=3, neurons=64, dropout_rate=0.2, learning_rate=0.001):
    model = build_neural_network(input_shape=10, layers=layers, neurons=neurons, dropout_rate=dropout_rate)  # input_shape is a placeholder, will be dynamically set
    model = compile_neural_network(model, learning_rate=learning_rate)
    return model

# Main function to drive the program
def main():
    # Get player and team data
    player_name = input("Enter a player's full name (e.g., LeBron James): ")
    nba_players = players.get_players()

    # Find player ID
    player_id = None
    for player in nba_players:
        if player['full_name'].lower() == player_name.lower():
            player_id = player['id']
            break

    if player_id is None:
        print("Player not found.")
        return

    # Seasons to retrieve data for
    seasons = ['2024-25', '2023-24']

    # Get player game logs for these seasons
    player_data = get_player_gamelog(player_id, seasons)

    # Calculate player averages against each team
    roster_averages = calculate_averages_vs_teams(player_data)

    # Train the neural network model
    nn_model, scaler = train_neural_network(roster_averages.drop(columns=['OPPONENT', 'Win_Loss_Ratio']),
                                            roster_averages[['PTS', 'REB', 'AST']])

    # Input the opposing team's abbreviation
    opponent_abbreviation = input("Enter the opposing team's abbreviation (e.g., LAL for Lakers): ")

    # Predict player stats against the opposing team
    predictions = predict_player_stats_nn(nn_model, scaler, roster_averages, opponent_abbreviation)

    # Display predictions
    if predictions is not None:
        print(f"\nPredicted stats against {opponent_abbreviation}:")
        for i in range(len(predictions)):
            print(f"Player {i+1}: PTS: {predictions[i][0]:.2f}, REB: {predictions[i][1]:.2f}, AST: {predictions[i][2]:.2f}")

if __name__ == "__main__":
    main()






In [None]:
#SVM Model
import pandas as pd
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import players
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import cross_val_score


# Step 1: Get player game log stats for the last 5 years
def get_player_gamelog(player_id, seasons):
    all_games = []
    for season in seasons:
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season).get_data_frames()[0]
        all_games.append(gamelog)
    return pd.concat(all_games, ignore_index=True)

# Step 2: Calculate averages for each player against each team
def calculate_averages_vs_teams(player_data):
    player_data['OPPONENT'] = player_data['MATCHUP'].apply(lambda x: x.split()[2])
    player_data['WL'] = player_data['WL'].map({'W': 1, 'L': 0})
    
    averages = player_data.groupby('OPPONENT').agg({
        'WL': 'mean',
        'MIN': 'mean',
        'FGM': 'mean',
        'FGA': 'mean',
        'FG_PCT': 'mean',
        'FG3M': 'mean',
        'FG3A': 'mean',
        'FG3_PCT': 'mean',
        'FTM': 'mean',
        'FTA': 'mean',
        'FT_PCT': 'mean',
        'OREB': 'mean',
        'DREB': 'mean',
        'REB': 'mean',
        'AST': 'mean',
        'STL': 'mean',
        'BLK': 'mean',
        'TOV': 'mean',
        'PF': 'mean',
        'PTS': 'mean'
    }).reset_index()
    
    averages.rename(columns={'WL': 'Win_Loss_Ratio'}, inplace=True)
    return averages

# Step 3: Train SVM model with k-fold cross-validation
def train_svm_model(averages):
    # Prepare the features and target variables
    features = averages.drop(columns=['OPPONENT', 'Win_Loss_Ratio'])
    targets = averages[['PTS', 'REB', 'AST']]
    
    # Scale the features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    # Define the SVM model and parameter grid for tuning
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
        'epsilon': [0.1, 0.2, 0.5],
        'kernel': ['linear', 'rbf', 'poly']  # Experiment with different kernels
    }

    # Use GridSearchCV for hyperparameter tuning with k-fold cross-validation
    best_models = {}
    for stat in ['PTS', 'REB', 'AST']:
        model = SVR()
        grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=5)
        grid_search.fit(features_scaled, targets[stat])
        
        best_models[stat] = grid_search.best_estimator_
        print(f'Best parameters for {stat}: {grid_search.best_params_}')
        
        # Evaluate the model using k-fold cross-validation
        cv_scores = cross_val_score(best_models[stat], features_scaled, targets[stat], cv=5, scoring='neg_mean_squared_error')
        mse_scores = -cv_scores  # Convert to positive MSE
        print(f'Cross-Validation MSE for {stat}: {mse_scores.mean():.2f} +/- {mse_scores.std():.2f}')

    return best_models, scaler

# Step 4: Predict player stats against the opposing team
def predict_player_stats(models, scaler, averages, opponent_abbreviation):
    opponent_data = averages[averages['OPPONENT'] == opponent_abbreviation]
    if opponent_data.empty:
        print(f"No data available for opponent: {opponent_abbreviation}")
        return None

    # Scale the features for prediction
    features = opponent_data.drop(columns=['OPPONENT', 'Win_Loss_Ratio'])
    features_scaled = scaler.transform(features)

    predictions = {}
    for stat in ['PTS', 'REB', 'AST']:
        predicted_values = models[stat].predict(features_scaled)
        predictions[stat] = predicted_values

    return predictions

# Main function
def main():



    # Get player and team data
    player_name = input("Enter a player's full name (e.g., LeBron James): ")
    nba_players = players.get_players()

    # Find player ID
    player_id = None
    for player in nba_players:
        if player['full_name'].lower() == player_name.lower():
            player_id = player['id']
            break

    if player_id is None:
        print("Player not found.")
        return

    # Seasons to retrieve data for
    seasons = ['2024-25','2023-24','2022-23', '2021-22']

    # Get player game logs for these seasons
    player_data = get_player_gamelog(player_id, seasons)

    # Calculate player averages against each team
    roster_averages = calculate_averages_vs_teams(player_data)

    # Train the SVM model
    models, scaler = train_svm_model(roster_averages)

    # Input the opposing team's abbreviation
    opponent_abbreviation = input("Enter the opposing team's abbreviation (e.g., LAL for Lakers): ")
    
    # Predict player stats against the opposing team
    predictions = predict_player_stats(models, scaler, roster_averages, opponent_abbreviation)

    # Display predictions
    if predictions:
        print(f"\nPredicted stats against {opponent_abbreviation}:")
        for i in range(len(predictions['PTS'])):
            print(f"Player {i+1}: PTS: {predictions['PTS'][i]:.2f}, REB: {predictions['REB'][i]:.2f}, AST: {predictions['AST'][i]:.2f}")

if __name__ == "__main__":
    main()
