In [2]:
import pandas as pd
import numpy as np
from basketball_reference_web_scraper import client
from basketball_reference_web_scraper.data import Location 
from basketball_reference_web_scraper.data import Team
from basketball_reference_web_scraper.data import Outcome 
from basketball_reference_web_scraper.data import OutputType  
from basketball_reference_web_scraper.data import OutputWriteOption 
from basketball_reference_web_scraper.data import Position
from datetime import datetime, timedelta, date
import time
import json

## Calls for the players information and stores it into a json file

In [3]:
cache_file_path = 'player_data_cache.json'

def load_cache():
    try:
        with open(cache_file_path, 'r') as file:
            return json.load(file)
    except(FileNotFoundError, json.JSONDecodeError):
        return {}

def date_converter(o):
    if isinstance(o, (date, datetime)):
        return o.isoformat()

def save_cache(cache):
    with open(cache_file_path, 'w') as file:
        json.dump(cache, file, default=date_converter, indent=4)
        
#load players stats
def player(name,force_update=False):
    cache = load_cache()
    
    year = 2024 
    
    if not force_update and name in cache and str(year) in cache[name]:
        return pd.DataFrame(cache[name][str(year)])
    elif not force_update:
        # If force_update is False and the data is not in the cache, return None or raise an error
        return pd.DataFrame()
    
    data = pd.DataFrame(client.regular_season_player_box_scores(
        player_identifier=name_dict[name], 
        season_end_year=year
    ))
    
    if 'date' in data.columns:  # Assuming 'date' is a column that needs conversion
        data['date'] = data['date'].apply(lambda x: x.isoformat() if isinstance(x, (date, datetime)) else x)

    if 'team' in data.columns:
        data['team'] = data['team'].apply(lambda x: x.name if isinstance(x, Team) else x)
    if 'location' in data.columns:
        data['location'] = data['location'].apply(lambda x: x.name if isinstance(x, Location) else x)
    if 'opponent' in data.columns:
        data['opponent'] = data['opponent'].apply(lambda x: x.name if isinstance(x, Team) else x)
    if 'outcome' in data.columns:
        data['outcome'] = data['outcome'].apply(lambda x: x.name if isinstance(x, Outcome) else x)
        
    data['total_rebounds'] = data['offensive_rebounds'] + data['defensive_rebounds']
    
    data.rename(columns={
        'games_played': 'GP',
        'made_field_goals': 'FGM',
        'attempted_field_goals': 'FGA',
        'made_three_point_field_goals': '3PTM',
        'attempted_three_point_field_goals': '3PTA',
        'made_free_throws': 'FTM',
        'attempted_free_throws': 'FTA',
        'offensive_rebounds': 'OREB',
        'defensive_rebounds': 'DREB',
        'personal_fouls': 'PF',
        'assists': 'AST',
        'steals':'STL',
        'blocks':'BLK',
        'turnovers':'TOV',
        'points_scored': 'PTS',
        'game_score': 'game score',
        'plus_minus': '+/-',
        'total_rebounds': 'REB'
        
    }, inplace=True)
    
    column_order = ['date', 'team', 'location', 'opponent', 'outcome', 'active','seconds_played', 'FGM', 'FGA', '3PTM', '3PTA', 'FTM', 'FTA','REB','OREB','DREB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'game score', '+/-']
    data = data[column_order
                ]
    if name not in cache:
        cache[name] = {}
    cache[name][str(year)] = data.to_dict('records')
    
    save_cache(cache)
    
    return data

def update_player_cache(name):
    return player(name, force_update=True)

In [4]:
df = pd.DataFrame(client.players_season_totals(season_end_year=2024))

name_dict = {}

for index,row in df.iterrows():
    slug = row['slug']
    name = row['name'].lower()
    if name not in name_dict:
        if slug not in name_dict.values():
            name_dict[name] = slug
    #     else:
    #         print(f"Player slug {slug} already exists in the dictionary with a different name.")
    # else:
    #     print(f"Name {name} already exists in the dictionary with slug {name_dict[name]}")

# My Functions

In [5]:
#used the get the average of a player for the season
def average_stat_for_season(name):
    name = player(name,force_update=False)
    
    if name.empty:
        print(f"No data found for player: {name}")
        return None
    
    print(f"PPG: {name['PTS'].mean().round(1)} RPG: {name['REB'].mean().round(1)} APG: {name['AST'].mean().round(1)}")

#used to calculate the amount of times they've hit the under in most recent games
def single_categories_stats(player_name, stat, projected_value, bet='over'):
    df = player(player_name, force_update=False)
    
    if stat not in df.columns:
        print(f"Stat '{stat}' not found in the data.")
        return None

    if df.empty:
        print(f"No data found for player: {player_name}")
        return None
    
    proj_value = np.ceil(projected_value) if bet =='over' else np.floor(projected_value)
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (df[stat] <= proj_value).sum()
    elif bet == 'over':
        tot_count = (df[stat] >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        player_data = df.tail(num_games)

        if bet == 'under':
            count = (player_data[stat] <= proj_value).sum()
        elif bet == 'over':
            count = (player_data[stat] >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for {stat} hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for {stat} hit {tot_count}/{total_games_played} for {per}%")
    
#calculates the total for rebounds + assists + points
def pts_reb_asts(player_name,projected_value, bet='over'):
    df = player(player_name,force_update=False)

    if df.empty:
        print(f"No data found for player {player_name}")
        return None
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)
    total = df['PTS'] + df['AST'] + df['REB']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        interval_data = df.tail(num_games)
        interval_total = interval_data['PTS'] + interval_data['AST'] + interval_data['REB']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for PTS+AST+REB hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for PTS+AST+REB hit {tot_count}/{total_games_played} for {per}%")
    
#calculates the total for rebounds + points
def pts_reb(player_name,projected_value, bet='over'):
    df = player(player_name,force_update=False)

    if df.empty:
        print(f"No data found for player {player_name}")
        return None
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)
    total = df['PTS'] + df['REB']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [15,10,5]
    for num_games in intervals:
        interval_data = df.tail(num_games)
        interval_total = interval_data['PTS'] + interval_data['REB']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for PTS+REB hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for PTS+REB hit {tot_count}/{total_games_played} for {per}%")
    
#calculates the total for assists + points
def pts_ast(player_name,projected_value, bet='over'):
    df = player(player_name,force_update=False)
    
    if df.empty:
        print(f"No data found for player {player_name}")
        return None
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)
    total = df['PTS'] + df['AST']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        interval_data = df.tail(num_games)
        interval_total = interval_data['PTS'] + interval_data['AST']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for PTS+AST hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for PTS+AST hit {tot_count}/{total_games_played} for {per}%")

#calculates the total for rebounds + assists
def reb_ast(player_name,projected_value, bet='over'):
    df = player(player_name,force_update=False)

    if df.empty:
        print(f"No data found for player {player_name}")
        return None
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)
    total = df['AST'] + df['REB']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        interval_data = df.tail(num_games)
        interval_total = interval_data['AST'] + interval_data['REB']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for AST+REB hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for AST+REB hit {tot_count}/{total_games_played} for {per}%")
 
#calculates the total for blocks + steals   
def blks_stls(player_name,projected_value, bet='over'):
    df = player(player_name,force_update=False)

    if df.empty:
        print(f"No data found for player {player_name}")
        return None
    
    proj_value = np.ceil(projected_value) if bet == 'over' else np.floor(projected_value)
    total = df['BLK'] + df['STL']
    
    total_games_played = df.shape[0]
    if bet == 'under':
        tot_count = (total <= proj_value).sum()
    elif bet == 'over':
        tot_count = (total >= proj_value).sum()
    per = (tot_count/total_games_played * 100).round(1)
    
    intervals = [5,10,15]
    for num_games in intervals:
        interval_data = df.tail(num_games)
        interval_total = interval_data['BLK'] + interval_data['STL']
        if bet == 'under':
            count = (interval_total <= proj_value).sum()
        elif bet == 'over':
            count = (interval_total >= proj_value).sum()

        percentage = (count / num_games * 100).round(1)
        print(f"The {bet} for BLK+STL hit {count}/{num_games} for {percentage}%")
    print(f"The {bet} for BLK+STL hit {tot_count}/{total_games_played} for {per}%")

# Get Players boxscores for the 2024 season 
### Using the player function

In [11]:
#Example 
name = player('lebron james',force_update=False)

### Every column in the data

In [12]:
name.tail()

Unnamed: 0,date,team,location,opponent,outcome,active,seconds_played,FGM,FGA,3PTM,...,OREB,DREB,AST,STL,BLK,TOV,PF,PTS,game score,+/-
56,2024-03-10,LOS_ANGELES_LAKERS,HOME,MINNESOTA_TIMBERWOLVES,WIN,True,2271,10,16,2,...,0,8,9,0,0,4,1,29,25.3,14
57,2024-03-13,LOS_ANGELES_LAKERS,AWAY,SACRAMENTO_KINGS,LOSS,True,2407,6,16,2,...,1,12,9,2,0,5,3,18,14.8,-10
58,2024-03-16,LOS_ANGELES_LAKERS,HOME,GOLDEN_STATE_WARRIORS,LOSS,True,2262,15,23,3,...,3,5,9,1,1,4,1,40,36.7,-6
59,2024-03-18,LOS_ANGELES_LAKERS,HOME,ATLANTA_HAWKS,WIN,True,1719,10,14,2,...,0,7,10,0,0,2,0,25,26.3,20
60,2024-03-22,LOS_ANGELES_LAKERS,HOME,PHILADELPHIA_76ERS,WIN,True,2272,7,15,1,...,0,8,6,0,1,8,1,20,10.4,7


In [13]:
single_categories_stats('lebron james', 'AST', 9.5, bet='over')

The over for AST hit 1/5 for 20.0%
The over for AST hit 2/10 for 20.0%
The over for AST hit 4/15 for 26.7%
The over for AST hit 13/61 for 21.3%


In [14]:
single_categories_stats('nikola jović','PTS',7.5, bet='over')


The over for PTS hit 3/5 for 60.0%
The over for PTS hit 5/10 for 50.0%
The over for PTS hit 7/15 for 46.7%
The over for PTS hit 12/34 for 35.3%


In [19]:
# make sure you have the up to date data for the players playing the next day
# end goal is to have it check all the games played recently and have them update 

recent = pd.DataFrame(client.player_box_scores(day=23, month=3, year=2024))

In [20]:
recent

Unnamed: 0,slug,name,team,location,opponent,outcome,seconds_played,made_field_goals,attempted_field_goals,made_three_point_field_goals,...,made_free_throws,attempted_free_throws,offensive_rebounds,defensive_rebounds,assists,steals,blocks,turnovers,personal_fouls,game_score
0,murrade01,Dejounte Murray,Team.ATLANTA_HAWKS,Location.HOME,Team.CHARLOTTE_HORNETS,Outcome.WIN,1852,10,14,7,...,1,1,0,7,12,5,0,4,1,33.3
1,greenja05,Jalen Green,Team.HOUSTON_ROCKETS,Location.HOME,Team.UTAH_JAZZ,Outcome.WIN,1975,15,22,7,...,4,4,1,3,4,1,0,3,3,32.8
2,vanvlfr01,Fred VanVleet,Team.HOUSTON_ROCKETS,Location.HOME,Team.UTAH_JAZZ,Outcome.WIN,1677,11,16,10,...,2,2,0,3,7,2,0,2,3,31.8
3,bookede01,Devin Booker,Team.PHOENIX_SUNS,Location.AWAY,Team.SAN_ANTONIO_SPURS,Outcome.WIN,1830,14,24,3,...,1,1,2,5,9,0,0,2,3,26.8
4,derozde01,DeMar DeRozan,Team.CHICAGO_BULLS,Location.HOME,Team.BOSTON_CELTICS,Outcome.LOSS,2544,12,19,0,...,4,4,0,6,9,0,1,2,1,25.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,simsje01,Jericho Sims,Team.NEW_YORK_KNICKS,Location.HOME,Team.BROOKLYN_NETS,Outcome.WIN,1094,0,2,0,...,0,0,2,5,0,1,0,2,3,-0.7
164,vassede01,Devin Vassell,Team.SAN_ANTONIO_SPURS,Location.HOME,Team.PHOENIX_SUNS,Outcome.LOSS,1410,1,8,0,...,0,0,0,3,3,0,0,1,0,-1.2
165,okogijo01,Josh Okogie,Team.PHOENIX_SUNS,Location.AWAY,Team.SAN_ANTONIO_SPURS,Outcome.WIN,574,0,2,0,...,0,0,1,0,1,0,0,1,1,-1.4
166,smithni01,Nick Smith,Team.CHARLOTTE_HORNETS,Location.AWAY,Team.ATLANTA_HAWKS,Outcome.LOSS,185,0,3,0,...,0,0,0,0,0,0,0,1,0,-3.1


### Updates all the players in the json file

In [9]:
key_list = list(name_dict.keys())
start_index = key_list.index('terry taylor')

for name in key_list[start_index: ]:
    try:
        player(name, force_update=True)
        time.sleep(5)  # Sleep for 10 seconds between requests
    except Exception as e:
        print(f"An error occurred while processing {name}: {e}")
        time.sleep(10)
    
    