In [None]:
import pandas as pd 
import random as rnd
import numpy as np 
from numpy import average
import matplotlib.pyplot as plt

In [None]:
# scrape most recent gamelog/adjust date as needed

In [None]:
a_gamelog = pd.read_html('https://www.basketball-reference.com/teams/MEM/2023/gamelog/', header = 1)
a_gamelog = a_gamelog[0]

In [None]:
a_gamelog =  a_gamelog.rename(columns = {'Unnamed: 3': 'A/H',
                                         'Tm': 'Tm_PTS',
                                         'Opp.1': 'Opp_PTS',
                                         '3PA': 'Three_FGA',
                                         '3PA.1': 'OThree_FGA'})

In [None]:
a_gamelog['A/H'] = a_gamelog['A/H'].replace(np.nan, 'H')
a_gamelog = a_gamelog.replace({'@':'A'})

In [None]:
a_gamelog = a_gamelog.drop(labels = [ 20,21], axis = 0)

In [None]:
h_gamelog = pd.read_html('https://www.basketball-reference.com/teams/ORL/2023/gamelog/', header = 1)
h_gamelog = h_gamelog[0]

In [None]:
h_gamelog =  h_gamelog.rename(columns = {'Unnamed: 3': 'A/H',
                                         'Tm': 'Tm_PTS',
                                         'Opp.1': 'Opp_PTS',
                                         '3PA': 'Three_FGA',
                                         '3PA.1': 'OThree_FGA'})

In [None]:
h_gamelog['A/H'] = h_gamelog['A/H'].replace(np.nan, 'H')
h_gamelog = h_gamelog.replace({'@':'A'})

In [None]:
h_gamelog = h_gamelog.drop(labels = [20,21])

In [None]:
#filters out all stat data except that within specified date


a_gamelog.date = a_gamelog.Date.apply(lambda x: pd.to_datetime(x, format = '%Y-%m-%d', errors = 'ignore'))
a_gamelog_last = a_gamelog.loc[a_gamelog['Date'].between('2022-12-24', '2023-12-04')]

h_gamelog.date = h_gamelog.Date.apply(lambda x : pd.to_datetime(x, format = '%Y-%m-%d', errors = 'ignore'))
h_gamelog_last = h_gamelog.loc[h_gamelog['Date'].between('2022-12-24', '2023-12-04')]


In [None]:
# histogram

a_gamelog_last['Tm_PTS'].hist(bins = 5, edgecolor = 'black')

In [None]:
# AWAY OFFENSE

ao_gamelog_dist = a_gamelog['Tm_PTS'].astype(str).astype(int)
ao_gamelog_weights = a_gamelog_last['Tm_PTS'].astype(str).astype(int)


def a_weight_average_o(ao_gamelog_dist, ao_gamelog_weights):
    
    weighted_sum = []
    
    for Tm_PTS, weight in zip(ao_gamelog_dist, ao_gamelog_weights):
        weighted_sum.append(Tm_PTS * weight)
        
    return round(sum(weighted_sum)/sum(ao_gamelog_weights))     

# AWAY DEFENSE

ad_gamelog_dist = a_gamelog['Opp_PTS'].astype(str).astype(int)
ad_gamelog_weights = a_gamelog_last['Opp_PTS'].astype(str).astype(int)

def a_weight_average_d(ad_gamelog_dist, ad_gamelog_weights):
    
    weighted_sum= []
    
    for Opp_PTS, weight in zip(ad_gamelog_dist, ad_gamelog_weights):
        weighted_sum.append(Opp_PTS * weight)
        
    return round(sum(weighted_sum)/sum(ad_gamelog_weights)) 

a_gamelog_last_std = ao_gamelog_weights.std()

In [None]:
# HOME OFFENSE 

ho_gamelog_dist = h_gamelog['Tm_PTS'].astype(str).astype(int)
ho_gamelog_weights = h_gamelog_last['Tm_PTS'].astype(str).astype(int)

def h_weight_average_o(ho_gamelog_dist, ho_gamelog_weights):
    
    weighted_sum= []
    
    for Tm_PTS, weight in zip(ho_gamelog_dist, ho_gamelog_weights):
        weighted_sum.append(Tm_PTS * weight)
        
    return round(sum(weighted_sum)/sum(ho_gamelog_weights))  
                 

# HOME DEFENSE

hd_gamelog_dist = h_gamelog['Opp_PTS'].astype(str).astype(int)
hd_gamelog_weights = h_gamelog_last['Opp_PTS'].astype(str).astype(int)

def h_weight_average_d(hd_gamelog_dist, hd_gamelog_weights):
    
    weighted_sum= []
    std = []
    
    for Opp_PTS, weight in zip(hd_gamelog_dist, hd_gamelog_weights):
        weighted_sum.append(Opp_PTS * weight)
        
    return round(sum(weighted_sum)/sum(hd_gamelog_weights)) 



In [None]:
print('Away wOff Points', a_weight_average_o(ao_gamelog_dist, ao_gamelog_weights))
print('Away wDef Points', a_weight_average_d(ad_gamelog_dist, ad_gamelog_weights))
print('Away wOFF StDv', ao_gamelog_dist.std())
print('Away wDef StDv', ad_gamelog_dist.std())
print('Away wOFF StDv', ao_gamelog_weights.std())
print('Away wDef StDv', ad_gamelog_weights.std())
print()
print('Home wOff Points', h_weight_average_o(ho_gamelog_dist, ho_gamelog_weights))
print('Home wDef Points', h_weight_average_d(hd_gamelog_dist, hd_gamelog_weights))
print('Home wOFF StDv', ho_gamelog_dist.std())
print('Home wDef StDv', hd_gamelog_dist.std())
print('Home wOFF StDv', ho_gamelog_weights.std())
print('Home wDef StDv', hd_gamelog_weights.std())

In [None]:
aopts = 120
adpts = 121
aostd = 5.1
adstd = 10.7



hopts = 118
hdpts = 116
hostd = 6.6
hdstd = 6.3


def gameSim():
    a_score = (rnd.gauss(aopts, aostd) + rnd.gauss(hdpts, hdstd))/2
    h_score = (rnd.gauss(hopts, hostd) + rnd.gauss(adpts, adstd))/2
    
    if int(round(a_score)) > int(round(h_score)):
        return 'away'
    elif int(round(a_score)) < int(round(h_score)):
        return 'home'
    else: return 0




In [None]:
# Here we can run the sim a defined number of times. '(ns)' = number of simulations

def gamesSim(ns):
    games = []
    a_win = 0
    h_win = 0
    tie = 0
    
    for i in range(ns):
        gm = gameSim()
        games.append(gm)
        if gm == 'away':
            a_win +=1
        elif gm == 'home':
            h_win +=1
        else: tie += 1
        
               
    print('AW', a_win/(a_win + h_win), '%')
    print('HW', h_win/(a_win + h_win), '%')
    print('T', tie/(a_win + h_win), '%')
            
    
    return games

In [None]:
gamesSim(100000)

In [None]:
# Totals sim

In [None]:
def gameTotal():
    a_scoret = (rnd.gauss(aopts, aostd) + rnd.gauss(hdpts, hdstd))/2
    h_scoret = (rnd.gauss(hopts, hostd) + rnd.gauss(adpts, adstd))/2
    
    if int(round(h_scoret)) + int(round(a_scoret)) > 236:
        return 'over'
    elif int(round(h_scoret)) + int(round(a_scoret)) < 236:
        return 'under'
    elif int(round(a_scoret)) + int(round(h_scoret)) == 236:
        return 'push'

In [None]:
def gameSimTotal(ns):
    gametotal = []
    over = 0
    under = 0
    push = 0
    
    for t in range(ns):
        gmtot = gameTotal()
        gametotal.append(gmtot)
        if gmtot == 'over':
            over +=1 
        elif gmtot == 'under':
            under +=1
        elif gmtot == 'push':
            push +=1
    
    print('o', over/100000, '%')
    print('u', under/100000, '%')
    print('p', push/100000, '%')
        
    return gametotal

In [None]:
gameSimTotal(100000)

In [None]:
# Spread Sim

In [None]:
# Remember to change variable for fav/dog

def gameSimSpread():
    a_spread = (rnd.gauss(aopts, aostd) + rnd.gauss(hopts, hostd))/2
    h_spread = (rnd.gauss(hopts, hostd) + rnd.gauss(aopts, aostd))/2
    if int(round(a_spread)) - int(round(h_spread)) > 8:
        return 1
    elif int(round(a_spread)) - int(round(h_spread)) < 8:
        return -1
    else: return 0

In [None]:
def gamesSimSpread(ns):
    gamesspread = []
    f_cover = 0
    d_cover = 0
    push = 0
    for i in range(ns):
        gmsp = gameSimSpread()
        gamesspread.append(gmsp)
        if gmsp == 1:
            f_cover += 1
        elif gmsp == -1:
            d_cover +=1
        else: push += 0
   
    print('Fav',f_cover/100000,'%')
    print('Dog', d_cover/100000,'%')
    print('Push',push/100000, '%')

    return gamesspread

gamesSimSpread(100000)

In [None]:
# Refs

In [None]:
refs = pd.read_html('https://www.basketball-reference.com/referees/2023_register.html', header = 1)
refs = refs[0]
refs = pd.DataFrame(refs)

refs.set_index('Referee')
refs = refs.rename(columns = {'PTS.3': 'Away_PTS', 'PTS.2': 'Home_PTS',
                              'PF.3': 'Away_PF', 'PF.2':'Home_PF',
                             'PF':'Total_PF'})

referees = refs.loc[(refs['Referee'] == 'Bill Kennedy') | 
         (refs['Referee'] == 'JB DeRosa')|
         (refs['Referee'] == 'Johnathan Sterling')]

print(referees[['PTS','Away_PTS', 'Home_PTS', 'Away_PF', 'Home_PF', 'Total_PF']].mean())
print(referees[['PTS','Away_PTS', 'Home_PTS', 'Away_PF', 'Home_PF', 'Total_PF']].median())


plt.show()

In [None]:
# histogram of points scored by each team 
#away = blue, home = orange
a_gamelog_last.Tm_PTS.hist(color = 'lightblue')
#h_gamelog_last.Tm_PTS.hist(color = 'wheat')
h_gamelog_last.Opp_PTS.hist(color = 'blue')
#_gamelog_last.Tm_PTS.hist(color = 'orange')