In [69]:
import pandas as pd
import numpy as np
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import Location 
from basketball_reference_web_scraper.data import Team
from basketball_reference_web_scraper.data import Outcome 
from basketball_reference_web_scraper.data import OutputType  
from basketball_reference_web_scraper.data import OutputWriteOption 
from basketball_reference_web_scraper.data import Position
from datetime import datetime, timedelta
import time
import json

## Calls for the players information and stores it into a json file

In [165]:
cache_file_path = 'player_data_cache.json'

def load_cache():
    try:
        with open(cache_file_path, 'r') as file:
            return json.load(file)
    except(FileNotFoundError, json.JSONDecodeError):
        return {}

def save_cache(cache):
    with open(cache_file_path, 'w') as file:
        json.dump(cache, file, indent = 4)

def player(name,force_update=False):
    cache = load_cache()
    
    year = 2024 
    
    if not force_update and name in cache and str(year) in cache[name]:
        return pd.DataFrame(cache[name][str(year)])
    elif not force_update:
        # If force_update is False and the data is not in the cache, return None or raise an error
        return "Data is not in the cache."
    
    data = pd.DataFrame(client.regular_season_player_box_scores(
        player_identifier=name_dict[name], 
        season_end_year=year
    ))
    
    if 'team' in data.columns:
        data['team'] = data['team'].apply(lambda x: x.name if isinstance(x, Team) else x)
    if 'location' in data.columns:
        data['location'] = data['location'].apply(lambda x: x.name if isinstance(x, Location) else x)
    if 'opponent' in data.columns:
        data['opponent'] = data['opponent'].apply(lambda x: x.name if isinstance(x, Team) else x)
    if 'outcome' in data.columns:
        data['outcome'] = data['outcome'].apply(lambda x: x.name if isinstance(x, Outcome) else x)
        
    data['total_rebounds'] = data['offensive_rebounds'] + data['defensive_rebounds']
    data.drop(columns=['date'], inplace=True)
    
    data.rename(columns={
        'games_played': 'GP',
        'made_field_goals': 'FGM',
        'attempted_field_goals': 'FGA',
        'made_three_point_field_goals': '3PTM',
        'attempted_three_point_field_goals': '3PTA',
        'made_free_throws': 'FTM',
        'attempted_free_throws': 'FTA',
        'offensive_rebounds': 'OREB',
        'defensive_rebounds': 'DREB',
        'personal_fouls': 'PF',
        'assists': 'AST',
        'steals':'STL',
        'blocks':'BLK',
        'turnovers':'TOV',
        'points_scored': 'PTS',
        'game_score': 'game score',
        'plus_minus': '+/-',
        'total_rebounds': 'REB'
        
    }, inplace=True)
    
    new_order = ['team', 'location', 'opponent', 'outcome', 'active',
             'seconds_played', 'FGM', 'FGA', '3PTM', '3PTA', 'FTM', 'FTA',
             'REB',
             'OREB', 'DREB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
             'game score', '+/-']
    
    if name not in cache:
        cache[name] = {}
    cache[name][str(year)] = data.to_dict('records')
    
    save_cache(cache)
    
    return data[new_order]

def update_player_cache(name):
    return player(name, force_update=True)

In [None]:
df = pd.DataFrame(client.players_season_totals(season_end_year=2024))

name_dict = {}

for index,row in df.iterrows():
    slug = row['slug']
    name = row['name'].lower()
    if name not in name_dict:
        if slug not in name_dict.values():
            name_dict[name] = slug
    #     else:
    #         print(f"Player slug {slug} already exists in the dictionary with a different name.")
    # else:
    #     print(f"Name {name} already exists in the dictionary with slug {name_dict[name]}")

# My Functions

In [242]:
#used the get the average of a player for the season
def average_stat_for_season(name, year):
    name = player(name,year)
    
    if name.empty:
        print(f"No data found for player: {name}")
        return None
    
    print(f"PPG: {name['PTS'].mean().round(1)} RPG: {name['REB'].mean().round(1)} APG: {name['AST'].mean().round(1)}")

#used to calculate the amount of times they've hit the under in most recent games
def single_categories_stats(player_name, stat, projected_value, bet='over'):
    df = player(player_name, force_update=False)

    player_data = df.tail(15)

    proj_value = np.ceil(projected_value) if bet =='over' else np.floor(projected_value)
    
    if stat not in player_data.columns:
        print(f"Stat '{stat}' not found in the data.")
        return None

    if player_data.empty:
        print(f"No data found for player: {player_name}")
        return None
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (df[stat] <= proj_value).sum()
    elif bet == 'over':
        tot_count = (df[stat] >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        player_data = player_data.tail(num_games)

        if bet == 'under':
            count = (player_data[stat] <= proj_value).sum()
        elif bet == 'over':
            count = (player_data[stat] >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for {stat} hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for {stat} hit {tot_count}/{total_games_played} for {per}%")
    
#calculates the total for rebounds + assists + points
def pts_reb_asts(player_name,projected_value, bet='over'):
    df = player(player_name,force_update=False)

    player_data = df.tail(15)
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)

    if player_data.empty:
        print(f"No data found for player {player_name}")
        return None
    
    total = df['PTS'] + df['AST'] + df['REB']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        interval_data = player_data.tail(num_games)
        interval_total = interval_data['PTS'] + interval_data['AST'] + interval_data['REB']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for PTS+AST+REB hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for PTS+AST+REB hit {tot_count}/{total_games_played} for {per}%")
    
#calculates the total for rebounds + points
def pts_reb(player_name,projected_value,num_games=None, bet='over'):
    df = player(player_name,force_update=False)

    player_data = df.tail(15)
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)

    if player_data.empty:
        print(f"No data found for player {player_name}")
        return None
    
    total = df['PTS'] + df['REB']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [15,10,5]
    for num_games in intervals:
        interval_data = player_data.tail(num_games)
        interval_total = interval_data['PTS'] + interval_data['REB']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for PTS+REB hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for PTS+REB hit {tot_count}/{total_games_played} for {per}%")
    
#calculates the total for assists + points
def pts_ast(player_name,projected_value,num_games=None, bet='over'):
    df = player(player_name,force_update=False)

    player_data = df.tail(15)
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)

    if player_data.empty:
        print(f"No data found for player {player_name}")
        return None
    
    total = df['PTS'] + df['AST']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        interval_data = player_data.tail(num_games)
        interval_total = interval_data['PTS'] + interval_data['AST']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for PTS+AST hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for PTS+AST hit {tot_count}/{total_games_played} for {per}%")

#calculates the total for rebounds + assists
def reb_ast(player_name,projected_value,num_games=None, bet='over'):
    df = player(player_name,force_update=False)

    player_data = df.tail(15)
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)

    if player_data.empty:
        print(f"No data found for player {player_name}")
        return None
    
    total = df['AST'] + df['REB']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        interval_data = player_data.tail(num_games)
        interval_total = interval_data['AST'] + interval_data['REB']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for AST+REB hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for AST+REB hit {tot_count}/{total_games_played} for {per}%")
 
#calculates the total for blocks + steals   
def blks_stls(player_name,projected_value,num_games=None, bet='over'):
    df = player(player_name,force_update=False)

    player_data = df.tail(15)
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)

    if player_data.empty:
        print(f"No data found for player {player_name}")
        return None
    
    total = df['BLK'] + df['STL']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        interval_data = player_data.tail(num_games)
        interval_total = interval_data['BLK'] + interval_data['STL']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for BLK+STL hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for BLK+STL hit {tot_count}/{total_games_played} for {per}%")

# Get Players boxscores for the 2024 season 
### Using the player function

In [None]:
#Example 
for name in name_dict:
    try:
        player(name, force_update=True)
        time.sleep(10)  # Sleep for 10 second between requests
    except Exception as e:
        print(f"An error occurred while processing {name}: {e}")
        time.sleep(15) 

In [101]:
name.columns

Index(['team', 'location', 'opponent', 'outcome', 'active', 'seconds_played',
       'FGM', 'FGA', '3PTM', '3PTA', 'FTM', 'FTA', 'REB', 'OREB', 'DREB',
       'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'game score', '+/-'],
      dtype='object')

In [102]:
name.head()

Unnamed: 0,team,location,opponent,outcome,active,seconds_played,FGM,FGA,3PTM,3PTA,...,OREB,DREB,AST,STL,BLK,TOV,PF,PTS,game score,+/-
0,Team.GOLDEN_STATE_WARRIORS,Location.HOME,Team.PHOENIX_SUNS,Outcome.LOSS,True,1845,8,20,4,14,...,1,4,1,1,0,2,5,27,15.8,-9
1,Team.GOLDEN_STATE_WARRIORS,Location.AWAY,Team.SACRAMENTO_KINGS,Outcome.WIN,True,2090,14,19,7,10,...,1,3,4,2,0,4,0,41,35.7,10
2,Team.GOLDEN_STATE_WARRIORS,Location.AWAY,Team.HOUSTON_ROCKETS,Outcome.WIN,True,1885,6,16,6,14,...,1,6,6,0,1,1,1,24,21.2,-2
3,Team.GOLDEN_STATE_WARRIORS,Location.AWAY,Team.NEW_ORLEANS_PELICANS,Outcome.WIN,True,1802,15,22,7,13,...,0,5,5,2,0,3,1,42,35.4,13
4,Team.GOLDEN_STATE_WARRIORS,Location.HOME,Team.SACRAMENTO_KINGS,Outcome.WIN,True,1940,7,15,4,10,...,1,4,3,0,0,7,3,21,9.1,-6


In [155]:
keys_list = list(name_dict.keys())
index = keys_list.index('rj barrett')

In [156]:
index

29

In [233]:
single_categories_stats('lebron james', 'AST', 9.5, bet='over')

The over for AST hit 1/5 for 20.0%
The over for AST hit 1/10 for 10.0%
The over for AST hit 1/15 for 6.7%
The over for AST hit 13/61 for 21.3%


In [241]:
blks_stls('lebron james',1.5, bet='over')


The over for BLK+STL hit 2/5 for 40.0%
The over for BLK+STL hit 5/10 for 50.0%
The over for BLK+STL hit 8/15 for 53.3%
The over for BLK+STL hit 34/61 for 55.7%
