In [182]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import typing
from scipy.stats import poisson

In [22]:
opp_stats = pd.read_csv('opponent_stats.csv',index_col=0)
schedule = pd.read_csv('schedule.csv',index_col=0)
game_logs_playoffs_df = pd.read_csv('pdata.csv')
game_logs_df = pd.read_csv('data_test.csv')
#KM_vals = pd.read_csv('KM_vals.csv',index_col=0)

In [14]:
today = dt.datetime.today()
year = today.year
month = today.month
day = today.day
today_str = f'{year}_{month}_{day}'

In [18]:
pp_lines = pd.read_csv(f'Lines/pp/pp_{today_str}.csv',index_col=0)
unabated = pd.read_csv(f'Lines/unabated/unabated_{today_str}.csv',index_col=0)

In [21]:
pp_lines.head(2)

Unnamed: 0,player,team,line,stat,league,date,event_time,time,prop_id
0,Nicolo Cambiaghi,Atalanta,1.5,Shots,82,2023-10-30 12:30:00-05:00,2023-10-30 12:30:00-05:00,2023-10-30 09:39:00,e943bc50-3
1,Aleksey Miranchuk,Empoli,1.5,Shots,82,2023-10-30 12:30:00-05:00,2023-10-30 12:30:00-05:00,2023-10-30 09:39:00,f5493391-6


In [186]:
class Game_Log:
    def __init__(self, df):
        self.game_log = df
        self.game_log["date"] = pd.to_datetime(self.game_log["date"], format="%Y-%m-%d")
        self.game_log["rest"] = self.game_log.groupby("player")["date"].diff().dt.days
        self.game_log = self.game_log.fillna(5)

    def get_avg(self, player, games=26, cat=None) -> float:
        data = self.game_log
        data = data.loc[data["player"] == player]
        data = data.iloc[-games:, 10:]
        if cat:
            data = data[cat]

        avg = data.mean()
        print(f"Jayson Tatum average {cat} over the last {games} games: ")
        return avg

    def get_stat(self, player, category) -> list[float]:
        stats = self.game_log
        stats = stats.loc[stats["player"] == player]
        x = category.split("+")
        points = stats[x[0]]
        for i in range(len(x) - 1):
            points = points + stats[x[i + 1]]
        return points.to_list()

    def moving_avg(self, player, cat, window_size=10) -> pd.Series:
        # getting relevant stat category series
        data = gl.game_log
        data = data.loc[data["player"] == player]
        x = cat.split("+")
        total = data[x[0]]

        # summing if category is multi stat
        for i in range(len(x) - 1):
            total = total + data[x[i + 1]]
            print(i)
        moving = total.rolling(window=window_size, min_periods=1).mean()
        return moving.reset_index(drop=True)

    def get_stat(self, player, category, stats=None) -> pd.Series:
        stats = self.game_log
        stats = stats.loc[stats["player"] == player]
        x = category.split("+")
        points = stats[x[0]]
        for i in range(len(x) - 1):
            points = points + stats[x[i + 1]]
        return points

    def print_prob(self, player, line, cat, stats=None, games=None) -> None:
        stats = gl.game_log
        if games == None:
            series = gl.get_stat(player, cat, stats).reset_index(drop=True)
        else:
            series = gl.get_stat(player, cat, stats)[-games:]
        p = (series > line).sum()
        n = len(series)
        print(
            f"{player} hits the {cat} line {p}/{n} times or", round(100 * p / n, 2), "%"
        )
        return
    


    def graph_stat(self, player, cat, window, lines) -> None:
        print(cat)
        mov = gl.moving_avg(player, cat, window)
        points = gl.get_stat(player, cat)
        line = get_line(player, cat, lines)
        if line.empty == False:
            line = line.iloc[0, 2]
            print("line:", line)
            gl.print_prob(player, line, cat)
            gl.print_prob(player, line, cat, games=10)

            plt.axhline(line, color="r", linestyle="--", label="line")

        x = range(len(points))
        plt.scatter(x, points, color="g", label=f"{cat}")  # games
        plt.plot(x, points, color="g", linestyle=(0, (3, 6)))  # game line
        plt.plot(x, mov, label="moving average")  # moving average

        plt.axhline(np.mean(points), color="y", linestyle="-", label="season average")
        plt.ylabel(cat)
        plt.xlabel("Game #")
        plt.title(player)
        plt.legend()
        plt.show()
        print("avg:", round(np.mean(points), 1))

    def dynamic(self, guy, cat, span=8):
        stats = gl.game_log
        series = gl.get_stat(guy, cat, stats)
        return series.ewm(span=span).mean()  # hyperparam

    def get_game(self, player, date, stats=None) -> pd.DataFrame:
        stats = gl.game_log
        mask = stats["date"] == pd.Timestamp(date)
        return stats.loc[(stats["player"] == player) & mask]
    
    def get_pos(self,guy) -> str:
        stats = gl.game_log
        return stats.loc[stats['player']==guy]['pos'].values[0]

    def get_rolling_hit_rate(self, player, line, cat, last_n=None) -> float:
        #need to fix eventually to account for szn
        stats = gl.game_log
        if last_n == None:
            window = len(stats)
        else:
            window = last_n
        series = gl.get_stat(player, cat, stats)
        hit_rate = (series > line).rolling(window=window,min_periods=1).mean() * 100
        return hit_rate
    
    def best_odds(self,lines) -> pd.DataFrame:
        data = gl.game_log
        date = pd.Timestamp(lines['date'].iloc[0])
        home_teams = schedule.loc[schedule['date']==date]['home'].values
        player_lines = lines.iloc[:,0]
        player_idx = data['player'].unique()
        vals = []
        for i in range(len(player_lines)):
            player = lines.iloc[i,0]
            
            if player in player_idx:
                cat = lines.iloc[i,3]
                opp = lines.iloc[i,1]
                if opp in home_teams:
                    home = 0
                else:
                    home = 1
                mov = gl.dynamic(player, cat ,5)
                series = gl.get_stat(player,cat)
                line = lines.iloc[i,2]
                season = gl.get_rolling_hit_rate(player,line,cat)
                l_10 = gl.get_rolling_hit_rate(player,line,cat,last_n=10)
                
                avg = np.mean(series)
                mov_avg = gl.dynamic(player,cat).iloc[-1]
                expected = mov_avg #adjust(player,cat,opp,avail)
                vals.append([player,opp,home,cat,round(avg,1),round(mov_avg,1),round(expected,1),line,l_10.iloc[-1],season.iloc[-1]])
        odd = pd.DataFrame(vals,columns=['player','opp','home','stat','Season_avg','mov_avg','expected','line','Last_10','Season'])
        #odd['blend'] = odd[['Last_10', 'Season']].mean(axis=1) 
        odd['Prob']= 1- poisson.cdf(mu=odd['expected'],k=odd['line'])
        return odd

gl = Game_Log(game_logs_df)
p = "Jordan Poole"
s = "PTS+TRB"
gl.best_odds(pool)

Unnamed: 0,player,opp,home,stat,Season_avg,mov_avg,expected,line,Last_10,Season,Prob
0,Jordan Poole,BOS,1,TRB,2.7,2.5,2.5,3.5,20.0,28.75,0.242424
1,Jordan Poole,BOS,1,PTS+TRB+AST,27.8,29.5,29.5,30.5,40.0,37.5,0.415382
2,Jordan Poole,BOS,1,PTS+TRB,23.3,25.5,25.5,25.5,50.0,41.25,0.486775
3,Jordan Poole,BOS,1,TRB+AST,7.2,6.4,6.4,8.5,10.0,31.25,0.196685
4,Jordan Poole,BOS,1,PTS,20.6,23.1,23.1,22.5,40.0,40.0,0.536004
5,Jordan Poole,BOS,1,PTS+AST,25.1,27.0,27.0,26.5,40.0,45.0,0.525597
6,Jordan Poole,BOS,1,AST,4.5,3.9,3.9,4.5,30.0,45.0,0.351635


In [179]:
nba= pp_lines.loc[pp_lines.league==7]
nba
pool = nba.loc[nba.player=='Jordan Poole']
pool

Unnamed: 0,player,team,line,stat,league,date,event_time,time,prop_id
276,Jordan Poole,BOS,3.5,TRB,7,2023-10-30 18:10:00-05:00,2023-10-30 18:10:00-05:00,2023-10-30 09:39:00,91901f5e-c
277,Jordan Poole,BOS,30.5,PTS+TRB+AST,7,2023-10-30 18:10:00-05:00,2023-10-30 18:10:00-05:00,2023-10-30 09:39:00,520a8cb6-6
279,Jordan Poole,BOS,25.5,PTS+TRB,7,2023-10-30 18:10:00-05:00,2023-10-30 18:10:00-05:00,2023-10-30 09:39:00,14530b86-8
282,Jordan Poole,BOS,8.5,TRB+AST,7,2023-10-30 18:10:00-05:00,2023-10-30 18:10:00-05:00,2023-10-30 09:39:00,0a0e1322-7
283,Jordan Poole,BOS,22.5,PTS,7,2023-10-30 18:10:00-05:00,2023-10-30 18:10:00-05:00,2023-10-30 09:39:00,27b5aeeb-4
284,Jordan Poole,BOS,26.5,PTS+AST,7,2023-10-30 18:10:00-05:00,2023-10-30 18:10:00-05:00,2023-10-30 09:39:00,d7468225-d
285,Jordan Poole,BOS,4.5,AST,7,2023-10-30 18:10:00-05:00,2023-10-30 18:10:00-05:00,2023-10-30 09:39:00,a30a66c5-b


In [177]:
gl.get_stat(p,s).iloc[-1]

32

In [50]:
#lines-based funcs

def get_line(player, pp_stat, lines):
    guy = lines.loc[lines["player"] == player]
    pt_lines = guy.loc[guy["stat"] == pp_stat]
    return pt_lines