In [2]:
import pandas as pd, numpy as np, datetime as dt, re
import matplotlib.pyplot as plt, scipy.stats as stats
import sqlalchemy as sql

from db_info import connection_str

In [216]:
# reference: https://fansided.com/2015/12/21/nylon-calculus-101-possessions/#:~:text=How%20does%20stats.NBA.com,correctly%20factor%20in%20team%20rebounds.
def possessionCalculator(df):
    # equation used to properly account for rebounds as a factor of overall possessions.
    # original formula:
        # 0.5 * (FGA + (0.4*FTA) - 1.07*(ORB / (ORB  + Opp DRB)) * (FGA - FG) + TOV) + (Opp FGA + (0.4*Opp FTA) - 1.07*(Opp ORB / (Opp ORB + DRB)) * (Opp FGA - Opp FG) + Opp TOV)
    home_teams = df[lambda x: x.home_team_bin == 1].loc[:, ["fg", "fga", "fta", "orb", "drb", "tov", "game_code"]]
    away_teams = df[lambda x: x.home_team_bin == 0].loc[:, ["fg", "fga", "fta", "orb", "drb", "tov", "game_code"]]
    df_game_by_game = home_teams.merge(away_teams, on="game_code", suffixes=("_home", "_away")).sort_index()
    
    df_game_by_game["game_possessions"] = 0.5 * ((df_game_by_game["fga_home"] + 0.4*df_game_by_game["fta_home"] - (1.07* (df_game_by_game["orb_home"] / (df_game_by_game["orb_home"] + df_game_by_game["drb_away"])))\
                                                  * (df_game_by_game["fga_home"] - df_game_by_game["fg_home"]) + df_game_by_game["tov_home"] +\
                                                  (df_game_by_game["fga_away"] + 0.4*df_game_by_game["fta_away"] - (1.07* (df_game_by_game["orb_away"] / (df_game_by_game["orb_away"] + df_game_by_game["drb_home"])))\
                                                   * (df_game_by_game["fga_away"] - df_game_by_game["fg_away"]) + df_game_by_game["tov_away"]
                                                  )))
    
    return df.merge(df_game_by_game.loc[:, ["game_code", "game_possessions"]], on="game_code")

In [3]:
engine = sql.create_engine(connection_str)
conn = engine.connect()

In [194]:
df_odds = pd.read_sql("select * from odds", con=conn, index_col="id")
df_time = pd.read_sql("select * from time", con=conn, index_col="id", parse_dates="timestamp")
df_gamecodes = pd.read_sql("select * from gamecodes", con=conn, index_col="id", parse_dates="date")

In [195]:
df_odds.shape

(87282, 9)

In [196]:
df_time.shape

(764039, 5)

In [197]:
df_gamecodes.shape

(5133, 4)

In [198]:
df_time.head()

Unnamed: 0_level_0,timestamp,book,spread,total,game_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2021-10-30 08:10:27,BOVADA,-7 -110,+224.5 -110,1459741
2,2021-10-30 08:35:12,BOVADA,-7 -110,+223.5 -110,1459741
3,2021-10-30 15:43:56,BOVADA,-7 -110,+224 -110,1459741
4,2021-10-30 15:50:00,BOVADA,-7 -110,+224.5 -110,1459741
5,2021-10-30 16:06:15,BOVADA,-7 -110,+223.5 -110,1459741


In [199]:
df_gamecodes.head()

Unnamed: 0_level_0,home_abbv,away_abbv,date,game_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,UTA,SAC,2021-11-02 21:00:00,1459841
2,DAL,MIA,2021-11-02 19:30:00,1459836
3,CHR,CLE,2021-11-01 19:00:00,1459786
4,SAN,DAL,2021-11-03 20:30:00,1459896
5,ATL,WAS,2021-11-01 19:30:00,1459806


In [200]:
df_gamecodes["game_id"] = df_gamecodes.game_id.astype("str")
df_odds["game_id"] = df_odds.game_id.astype("str")
df_time["game_id"] = df_time.game_id.astype("str")

In [190]:
df_odds

Unnamed: 0_level_0,team_abbv,book,moneyline,spread,spread_odds,total,over_odds,under_odds,game_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,SAC,Opening,305,10.0,-110,220.0,-110,-110,1459841
2,UTA,Opening,-455,-10.0,-110,220.0,-110,-110,1459841
3,SAC,BOVADA,310,9.0,-110,223.0,-110,-110,1459841
4,UTA,BOVADA,-415,-9.0,-110,223.0,-110,-110,1459841
5,SAC,BetOnline,340,9.0,-106,223.0,-110,-110,1459841
...,...,...,...,...,...,...,...,...,...
87278,PHO,MyBookie,475,10.0,-110,209.0,-110,-110,894160
87279,BOS,GTBets,-691,-10.0,-111,209.0,-107,-108,894160
87280,PHO,GTBets,492,10.0,-108,209.0,-107,-108,894160
87281,BOS,SkyBook,-662,-10.0,-110,208.5,-110,-110,894160


In [201]:
def seasoner(date):
    if date <= dt.datetime(2018, 6, 8):
        return "17-18"
    elif date <= dt.datetime(2019, 6, 13):
        return "18-19"
    elif date <= dt.datetime(2020, 10, 11):
        return "19-20"
    elif date <= dt.datetime(2021, 7, 21):
        return "20-21"
    else:
        return "21-22"

In [202]:
df_gamecodes["season"] = df_gamecodes.date.apply(seasoner)

In [203]:
df_gamecodes.groupby(["season"]).count()

Unnamed: 0_level_0,home_abbv,away_abbv,date,game_id
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
17-18,1370,1370,1370,1370
18-19,1319,1319,1319,1319
19-20,1144,1144,1144,1144
20-21,1182,1182,1182,1182
21-22,118,118,118,118


In [15]:
engine_2 = sql.create_engine("sqlite:///../data/interim/NBA.db")
conn_2 = engine_2.connect()
df_player_basic= pd.read_sql("SELECT * FROM basic_stats WHERE name = 'Team Totals'", con=conn_2)

In [None]:
not_in_NBA = merged_df[merged_df.home_team.isnull()].sort_values(['game_code'])[lambda x: x.date < dt.datetime(2021, 10, 17)].game_code.values
not_in_SQL = merged_df[merged_df.home_abbv.isnull()].sort_values("game_code").game_code.unique()

In [None]:
# feed game_codes into game.py
not_in_NBA

array(['202008150POR', '202012220BRK', '202012220LAL', '202012230BOS',
       '202012230CHI', '202012230CLE', '202012230DEN', '202012230IND',
       '202012230MEM', '202012230MIN', '202012230ORL', '202012230PHI',
       '202012230PHO', '202012230POR', '202105180BOS', '202105180IND',
       '202105190LAL', '202105190MEM', '202105200WAS', '202105210GSW'],
      dtype=object)

In [None]:
# look up in odds shark and feed into shark.py
not_in_SQL

array(['201801290ATL', '201803180LAC', '201803180MIN', '201803180NOP',
       '201803180TOR', '201803220CHO', '201803220DAL', '201803220HOU',
       '201803220NOP', '201803220ORL', '201803220SAC', '201803250BRK',
       '201803250GSW', '201803250HOU', '201803250IND', '201803250MIL',
       '201803250OKC', '201803250SAC', '201803250TOR', '201803250WAS',
       '201803260CHO', '201803260DET', '201803260MIN', '201803260PHI',
       '201803260PHO', '201803310BOS', '201803310MIA', '201803310NYK',
       '201803310SAC', '201803310WAS', '201804050CLE', '201804050DEN',
       '201804050HOU', '201804050IND', '201804050MIL', '201804050UTA',
       '201804060BOS', '201804060DET', '201804060LAL', '201804060MEM',
       '201804060NYK', '201804060ORL', '201804060PHI', '201804060PHO',
       '201804060TOR', '201804060WAS', '201811190ATL', '201903260CHO',
       '201903260CLE', '201903260DAL', '201903260DEN', '201903260LAL',
       '201903260MIA', '201903260MIL', '201903260MIN', '201903260NOP',
      

In [None]:
with open("../data/interim/bball_ref_additional.txt", "w") as file:
    for x in not_in_NBA:
        file.write(x + "\n")
    file.close()

In [16]:
df_player_basic.head(20)

Unnamed: 0,id,name,minutes_played,fg,fga,fg_pct,fg3,fg3a,fg3_pct,ft,...,stl,blk,tov,pf,pts,bpm,dnp,timetype,team,game_code
0,14,Team Totals,240.0,38,92,0.413,12,30,0.4,19,...,6,6,14,17,107,0.0,0,TotalBasics,DAL,201810260TOR
1,28,Team Totals,60.0,9,23,0.391,2,9,0.222,6,...,1,0,2,7,26,0.0,0,1Q,DAL,201810260TOR
2,42,Team Totals,60.0,13,23,0.565,4,7,0.571,4,...,2,3,3,3,34,0.0,0,2Q,DAL,201810260TOR
3,56,Team Totals,120.0,22,46,0.478,6,16,0.375,10,...,3,3,5,10,60,0.0,0,1H,DAL,201810260TOR
4,70,Team Totals,60.0,9,22,0.409,4,8,0.5,7,...,1,2,2,3,29,0.0,0,3Q,DAL,201810260TOR
5,84,Team Totals,60.0,7,24,0.292,2,6,0.333,2,...,2,1,7,4,18,0.0,0,4Q,DAL,201810260TOR
6,98,Team Totals,120.0,16,46,0.348,6,14,0.429,9,...,3,3,9,7,47,0.0,0,2H,DAL,201810260TOR
7,110,Team Totals,240.0,44,91,0.484,11,27,0.407,17,...,10,5,12,22,116,0.0,0,TotalBasics,TOR,201810260TOR
8,122,Team Totals,60.0,13,24,0.542,4,11,0.364,9,...,2,2,2,7,39,0.0,0,1Q,TOR,201810260TOR
9,134,Team Totals,60.0,12,23,0.522,4,6,0.667,2,...,2,1,3,3,30,0.0,0,2Q,TOR,201810260TOR


In [17]:
check_reference = df_player_basic.query("timetype == 'TotalBasics'").loc[:, ["team", "game_code"]]

In [18]:
abbv_regex = re.compile("[A-Z]{3}")

In [19]:
check_reference["home_team"] = check_reference.game_code.apply(lambda x: abbv_regex.search(x)[0])

In [20]:
check_reference["home_team_bin"] = (check_reference["team"] == check_reference["home_team"]).astype("int32")

In [21]:
check_reference

Unnamed: 0,team,game_code,home_team,home_team_bin
0,DAL,201810260TOR,TOR,0
7,TOR,201810260TOR,TOR,1
14,BOS,201810190TOR,TOR,0
21,TOR,201810190TOR,TOR,1
28,CLE,201810170TOR,TOR,0
...,...,...,...,...
69997,MEM,202105190MEM,MEM,1
70004,MEM,202105210GSW,GSW,0
70012,GSW,202105210GSW,GSW,1
70020,MEM,202008150POR,POR,0


In [94]:
df_gamecodes.shape

(5133, 5)

In [95]:
df_gamecodes.home_abbv.unique()

array(['UTA', 'DAL', 'CHR', 'SAN', 'ATL', 'LAL', 'GS', 'IND', 'BKN',
       'MIN', 'SAC', 'CHI', 'WAS', 'CLE', 'LAC', 'BOS', 'MIL', 'PHI',
       'DET', 'ORL', 'PHO', 'MEM', 'MIA', 'NY', 'TOR', 'DEN', 'NOP',
       'POR', 'HOU', 'OKC'], dtype=object)

In [96]:
check_reference.team.unique()

array(['DAL', 'TOR', 'BOS', 'CLE', 'CHO', 'MIN', 'DET', 'NOP', 'NYK',
       'PHI', 'WAS', 'LAC', 'SAS', 'POR', 'BRK', 'IND', 'GSW', 'PHO',
       'OKC', 'ORL', 'CHI', 'UTA', 'HOU', 'ATL', 'MIL', 'DEN', 'SAC',
       'MIA', 'LAL', 'MEM'], dtype=object)

In [204]:
shark_teams_flipper = {'CHR': 'CHO', 'SAN': 'SAS', 'GS': 'GSW', 'BKN': 'BRK', 'NY': 'NYK'}
df_gamecodes.home_abbv = df_gamecodes.home_abbv.apply(lambda x: shark_teams_flipper[x] if x in shark_teams_flipper.keys() else x)
df_gamecodes.away_abbv = df_gamecodes.away_abbv.apply(lambda x: shark_teams_flipper[x] if x in shark_teams_flipper.keys() else x) 

In [205]:
# Transform game_id into bball ref code 
df_gamecodes["game_code"] = (df_gamecodes.date.apply(dt.datetime.strftime, format="%Y%m%d0")) + df_gamecodes.home_abbv

In [24]:
merged_df = df_gamecodes.merge(check_reference, "left", on="game_code")

In [25]:
merged_df[merged_df.season != "21-22"].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10070 entries, 117 to 10187
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   home_abbv      10070 non-null  object        
 1   away_abbv      10070 non-null  object        
 2   date           10070 non-null  datetime64[ns]
 3   game_id        10070 non-null  object        
 4   season         10070 non-null  object        
 5   game_code      10070 non-null  object        
 6   team           10070 non-null  object        
 7   home_team      10070 non-null  object        
 8   home_team_bin  10070 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(7)
memory usage: 786.7+ KB


In [26]:
completed_seasons = merged_df[merged_df.season != "21-22"]
completed_seasons[completed_seasons.team.isnull()]

Unnamed: 0,home_abbv,away_abbv,date,game_id,season,game_code,team,home_team,home_team_bin


In [27]:
check_reference[check_reference.home_team == "GSW"].sort_values("game_code")

Unnamed: 0,team,game_code,home_team,home_team_bin
56741,GSW,201710170GSW,GSW,1
56734,HOU,201710170GSW,GSW,0
56552,TOR,201710250GSW,GSW,0
56559,GSW,201710250GSW,GSW,1
56867,GSW,201710270GSW,GSW,1
...,...,...,...,...
68226,NOP,202105140GSW,GSW,0
68282,MEM,202105160GSW,GSW,0
68289,GSW,202105160GSW,GSW,1
70004,MEM,202105210GSW,GSW,0


In [28]:
bball_merged = check_reference.merge(df_gamecodes, "left", on="game_code")

In [29]:
bball_merged[bball_merged.home_abbv.isnull()]

Unnamed: 0,team,game_code,home_team,home_team_bin,home_abbv,away_abbv,date,game_id,season


In [30]:
# creating sample data

In [74]:
sample_cumulative = df_gamecodes.date.sort_values(ascending=True).apply(dt.datetime.strftime, format="%Y-%m-%d").drop_duplicates().reset_index(drop=True).sort_values()

In [75]:
winnings = pd.Series([np.random.randint(-25, 25) for x in range(len(sample_cumulative))], name="winnings")

In [96]:
sample_bar_data = pd.concat([sample_cumulative, winnings], axis=1)

In [110]:
sample_bar_data.date = pd.to_datetime(sample_bar_data.date)

In [111]:
full_range_index = pd.date_range(start_date, end_date, freq="D")

In [112]:
sample_bar_data.index = sample_bar_data.date
sample_bar_data = sample_bar_data.drop("date", axis=1)

In [138]:
# sample_bar_data.resample("1D").ffill().iloc[115:130, :]
# sample_bar_data.iloc[115:130, :]

In [141]:
line_chart_sample_df = sample_bar_data.reset_index()

In [147]:
book_summary_sample_df = pd.DataFrame({"Book": ['BetNow', 'BetOnline', 'Bovada', "GTBets", 'Intertops', 'MyBookie', 'Opening', 'Skybook', 'SportsBetting', 'Average', 'Best', 'Worst'], 
"Wins": [1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 666, 237, 1800], "Losses": [99, 100, 102, 103, 104, 105, 106, 107, 108, 161, 223, 954], "Pushes": [2, 3, 4, 5, 6, 8, 9, 11, 21, 105, 523, 301],
"Win_Pct": [.542, .476, .505, .610, .714, .712, .505, .301, .494, .511, .35, .409],"Metric":["Spread", "Spread", "Spread", "Spread", "Spread", "Spread", "Spread", "Spread", "Spread", "Spread", "Spread", "Spread"],
"Season":["17-18", "17-18", "17-18", "17-18", "17-18", "17-18", "17-18", "17-18", "17-18", "17-18", "17-18", "17-18"]})

In [148]:
book_summary_sample_df

Unnamed: 0,Book,Wins,Losses,Pushes,Win_Pct,Metric,Season
0,BetNow,1000,99,2,0.542,Spread,17-18
1,BetOnline,1001,100,3,0.476,Spread,17-18
2,Bovada,1002,102,4,0.505,Spread,17-18
3,GTBets,1003,103,5,0.61,Spread,17-18
4,Intertops,1004,104,6,0.714,Spread,17-18
5,MyBookie,1005,105,8,0.712,Spread,17-18
6,Opening,1006,106,9,0.505,Spread,17-18
7,Skybook,1007,107,11,0.301,Spread,17-18
8,SportsBetting,1008,108,21,0.494,Spread,17-18
9,Average,666,161,105,0.511,Spread,17-18


In [160]:
df_team_toals_basic = df_player_basic.query("timetype == 'TotalBasics'")

In [163]:
df_advanced_totals = pd.read_sql("SELECT * FROM advanced_stats WHERE name = 'Team Totals'", con=conn_2)

In [168]:
team_rankings_sample = pd.DataFrame()

In [174]:
for i, team in enumerate(df_team_toals_basic.team.unique()):
    team_rankings_sample.loc[i, "Team"] = team
    team_rankings_sample.loc[i, "Avg_Spread"] = np.random.randint(1, 12) 
    team_rankings_sample.loc[i, "Scoring"] = np.random.randint(99, 114)
    team_rankings_sample.loc[i, "eFG"] = np.random.randint(38, 59) * .01
    team_rankings_sample.loc[i, "TOV"] = np.random.randint(10, 40) * .01
    team_rankings_sample.loc[i, "ORB"] = np.random.randint(12, 35) * .01
    team_rankings_sample.loc[i, "FT_per_FGA"] = np.random.randint(90, 272) * .001
    team_rankings_sample.loc[i, "Pace"] = np.random.randint(95, 120)
    team_rankings_sample.loc[i, "Ws"] = np.random.randint(33, 82)
    team_rankings_sample.loc[i, "Ls"] = np.random.randint(33, 82)
    team_rankings_sample.loc[i, "Ps"] = np.random.randint(1, 5)
    team_rankings_sample.loc[i, "Season"] = "17-18" 

In [175]:
team_rankings_sample

Unnamed: 0,Team,Avg_Spread,Scoring,eFG,TOV,ORB,FT_per_FGA,Pace,Ws,Ls,Ps,Season
0,DAL,6.0,110.0,0.5,0.22,0.25,0.123,110.0,49.0,39.0,3.0,17-18
1,TOR,8.0,103.0,0.51,0.12,0.21,0.147,117.0,67.0,57.0,1.0,17-18
2,BOS,1.0,99.0,0.5,0.13,0.23,0.187,104.0,62.0,66.0,2.0,17-18
3,CLE,9.0,107.0,0.5,0.24,0.26,0.174,109.0,72.0,66.0,3.0,17-18
4,CHO,8.0,102.0,0.45,0.16,0.14,0.115,116.0,56.0,58.0,3.0,17-18
5,MIN,7.0,103.0,0.47,0.21,0.22,0.212,110.0,58.0,34.0,1.0,17-18
6,DET,2.0,105.0,0.39,0.39,0.23,0.103,116.0,74.0,37.0,2.0,17-18
7,NOP,10.0,111.0,0.38,0.16,0.14,0.173,106.0,34.0,78.0,4.0,17-18
8,NYK,4.0,102.0,0.42,0.23,0.22,0.203,106.0,68.0,59.0,2.0,17-18
9,PHI,7.0,102.0,0.4,0.17,0.33,0.271,97.0,42.0,41.0,4.0,17-18


In [177]:
team_rankings_sample.to_sql()

Unnamed: 0,Team,Avg_Spread,Scoring,eFG,TOV,ORB,FT_per_FGA,Pace,Ws,Ls,Ps,Season
0,DAL,6.0,110.0,0.5,0.22,0.25,0.123,110.0,49.0,39.0,3.0,17-18
1,TOR,8.0,103.0,0.51,0.12,0.21,0.147,117.0,67.0,57.0,1.0,17-18
2,BOS,1.0,99.0,0.5,0.13,0.23,0.187,104.0,62.0,66.0,2.0,17-18
3,CLE,9.0,107.0,0.5,0.24,0.26,0.174,109.0,72.0,66.0,3.0,17-18
4,CHO,8.0,102.0,0.45,0.16,0.14,0.115,116.0,56.0,58.0,3.0,17-18
5,MIN,7.0,103.0,0.47,0.21,0.22,0.212,110.0,58.0,34.0,1.0,17-18
6,DET,2.0,105.0,0.39,0.39,0.23,0.103,116.0,74.0,37.0,2.0,17-18
7,NOP,10.0,111.0,0.38,0.16,0.14,0.173,106.0,34.0,78.0,4.0,17-18
8,NYK,4.0,102.0,0.42,0.23,0.22,0.203,106.0,68.0,59.0,2.0,17-18
9,PHI,7.0,102.0,0.4,0.17,0.33,0.271,97.0,42.0,41.0,4.0,17-18


In [212]:
four_factors = df_advanced_totals.loc[:, ["team", "efg_pct", "tov_pct", "orb_pct", "fta_per_fga_pct", "game_code"]]

In [213]:
df_player_basic

Unnamed: 0,id,name,minutes_played,fg,fga,fg_pct,fg3,fg3a,fg3_pct,ft,...,stl,blk,tov,pf,pts,bpm,dnp,timetype,team,game_code
0,14,Team Totals,240.0,38,92,0.413,12,30,0.400,19,...,6,6,14,17,107,0.0,0,TotalBasics,DAL,201810260TOR
1,28,Team Totals,60.0,9,23,0.391,2,9,0.222,6,...,1,0,2,7,26,0.0,0,1Q,DAL,201810260TOR
2,42,Team Totals,60.0,13,23,0.565,4,7,0.571,4,...,2,3,3,3,34,0.0,0,2Q,DAL,201810260TOR
3,56,Team Totals,120.0,22,46,0.478,6,16,0.375,10,...,3,3,5,10,60,0.0,0,1H,DAL,201810260TOR
4,70,Team Totals,60.0,9,22,0.409,4,8,0.500,7,...,1,2,2,3,29,0.0,0,3Q,DAL,201810260TOR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70029,965037,Team Totals,0.0,,,0.000,,,0.000,,...,,,,,,0.0,0,2Q,POR,202008150POR
70030,965050,Team Totals,0.0,,,0.000,,,0.000,,...,,,,,,0.0,0,1H,POR,202008150POR
70031,965063,Team Totals,0.0,,,0.000,,,0.000,,...,,,,,,0.0,0,3Q,POR,202008150POR
70032,965076,Team Totals,0.0,,,0.000,,,0.000,,...,,,,,,0.0,0,4Q,POR,202008150POR


In [218]:
df_player_basic = df_player_basic.query("timetype == 'TotalBasics'")

In [221]:
df_player_basic["home_team"] = df_player_basic.game_code.apply(lambda x: abbv_regex.search(x)[0])
df_player_basic["home_team_bin"] = (df_player_basic["team"] == df_player_basic["home_team"]).astype("int32")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [237]:
pts_and_possessions = possessionCalculator(df_player_basic).loc[:, ["team", "pts", "game_possessions", "game_code", "home_team_bin"]]

In [238]:
results_df = pts_and_possessions.merge(four_factors, on=["game_code", "team"])

In [239]:
results_df.orb_pct = results_df.orb_pct / 100
results_df.tov_pct = results_df.tov_pct / 100

In [240]:
results_df

Unnamed: 0,team,pts,game_possessions,game_code,home_team_bin,efg_pct,tov_pct,orb_pct,fta_per_fga_pct
0,DAL,107,100.722,201810260TOR,0,0.478,0.120,0.208,0.261
1,TOR,116,100.722,201810260TOR,1,0.544,0.107,0.261,0.220
2,BOS,101,97.7803,201810190TOR,0,0.475,0.119,0.302,0.101
3,TOR,113,97.7803,201810190TOR,1,0.538,0.107,0.267,0.207
4,CLE,104,100.326,201810170TOR,0,0.441,0.135,0.286,0.459
...,...,...,...,...,...,...,...,...,...
10471,MEM,100,97.4041,202105190MEM,1,0.483,0.082,0.216,0.267
10472,MEM,117,105.546,202105210GSW,0,0.485,0.106,0.302,0.265
10473,GSW,112,105.546,202105210GSW,1,0.511,0.170,0.262,0.213
10474,MEM,122,104.101,202008150POR,0,0.565,0.150,0.286,0.239


In [241]:
df_odds

Unnamed: 0_level_0,team_abbv,book,moneyline,spread,spread_odds,total,over_odds,under_odds,game_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,SAC,Opening,305,10.0,-110,220.0,-110,-110,1459841
2,UTA,Opening,-455,-10.0,-110,220.0,-110,-110,1459841
3,SAC,BOVADA,310,9.0,-110,223.0,-110,-110,1459841
4,UTA,BOVADA,-415,-9.0,-110,223.0,-110,-110,1459841
5,SAC,BetOnline,340,9.0,-106,223.0,-110,-110,1459841
...,...,...,...,...,...,...,...,...,...
87278,PHO,MyBookie,475,10.0,-110,209.0,-110,-110,894160
87279,BOS,GTBets,-691,-10.0,-111,209.0,-107,-108,894160
87280,PHO,GTBets,492,10.0,-108,209.0,-107,-108,894160
87281,BOS,SkyBook,-662,-10.0,-110,208.5,-110,-110,894160


In [271]:
book_info_df = df_odds.merge(df_gamecodes.loc[:, ["game_id", "game_code"]], on=["game_id"])[lambda x: x.spread != -999].reset_index(drop=True)

In [283]:
book_info_df.groupby(["game_code", "team_abbv"]).idxmin() # Would Be Highest Favored Line, Priceiest Moenyline,Lowest Total = min

Unnamed: 0_level_0,Unnamed: 1_level_0,moneyline,spread,spread_odds,total,over_odds,under_odds
game_code,team_abbv,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
201710170CLE,BOS,75680,75674,75674,75674,75672,75672
201710170CLE,CLE,75673,75673,75673,75675,75673,75673
201710170GSW,GS,75779,75773,75773,75773,75773,75771
201710170GSW,HOU,75770,75770,75770,75772,75772,75770
201710180BOS,BOS,75809,75801,75801,75807,75799,75801
...,...,...,...,...,...,...,...
202111030SAC,SAC,177,179,179,179,177,177
202111030SAS,DAL,50,50,52,48,52,48
202111030SAS,SAN,63,49,49,49,53,49
202111030WAS,TOR,208,208,212,210,208,208


In [None]:
book_info_df.groupby("game_code").idxmax() # Would be Most Points Given to Underdog, Best Underdog Moneyline and Highest Total 

In [280]:
book_info_df.iloc[75772:75785, :]

Unnamed: 0,team_abbv,book,moneyline,spread,spread_odds,total,over_odds,under_odds,game_id,game_code
75772,HOU,BetOnline,375,9.5,-103,230.5,-115,-105,888635,201710170GSW
75773,GS,BetOnline,-450,-9.5,-117,230.5,-115,-105,888635,201710170GSW
75774,HOU,Intertops,355,9.5,-110,231.0,-110,-110,888635,201710170GSW
75775,GS,Intertops,-460,-9.5,-110,231.0,-110,-110,888635,201710170GSW
75776,HOU,SportsBetting,375,9.5,-103,230.5,-115,-105,888635,201710170GSW
75777,GS,SportsBetting,-450,-9.5,-117,230.5,-115,-105,888635,201710170GSW
75778,HOU,BetNow,395,9.5,-110,231.0,-110,-110,888635,201710170GSW
75779,GS,BetNow,-495,-9.5,-110,231.0,-110,-110,888635,201710170GSW
75780,HOU,MyBookie,370,9.5,-110,231.0,-110,-110,888635,201710170GSW
75781,GS,MyBookie,-495,-9.5,-110,231.0,-110,-110,888635,201710170GSW
