# Model Scoring Distributions

We will now model the scoring distributions of games. In the cell below, all scoring play columns are added to a dataset of 2023 plays. We record separate columns for home and away field goals worth 3 points, and touchdowns worth 6 points with a separate column for the extra point and two-point conversion.

In [1]:
import pandas as pd
import numpy as np
import altair as alt
from sklearn.tree import DecisionTreeRegressor

In [2]:
all_plays_2023 = pd.read_csv('/Users/MC/Downloads/play_by_play_2023.csv')
plays_2023 = all_plays_2023[['season_type','game_id','week','qtr','total_away_score','total_home_score','quarter_seconds_remaining','sp','posteam','defteam','side_of_field','home_team','away_team','td_team','touchdown','extra_point_result','two_point_conv_result','field_goal_result','total_home_score','total_away_score','desc','play_type','safety']].copy()
all_games = plays_2023[(plays_2023['sp'] == 1) | (plays_2023['extra_point_result'] == 'failed') | (plays_2023['two_point_conv_result'] == 'failure')].copy()
all_games['home_team_score'] = (all_games['td_team'] == all_games['home_team']).map(lambda x:  6 if x == True else 0)
all_games['away_team_score'] = (all_games['td_team'] == all_games['away_team']).map(lambda x:  6 if x == True else 0)
all_games['xp'] = all_games['extra_point_result'].map(lambda x:  1 if x == 'good' else 0).shift(-1).fillna(0)
all_games['2pt'] = all_games['two_point_conv_result'].map(lambda x:  2 if x == 'success' else 0).shift(-1).fillna(0)

homesafety = ((all_games['defteam'] == all_games['home_team']) & (all_games['safety'] == 1)).map(lambda x: 2 if x == True else 0)
awaysafety = ((all_games['defteam'] == all_games['away_team']) & (all_games['safety'] == 1)).map(lambda x: 2 if x == True else 0)
homekick = ((all_games['posteam'] == all_games['home_team']) & (all_games['field_goal_result'] == 'made')).map(lambda x:  3 if x == True else 0)
awaykick = ((all_games['posteam'] == all_games['away_team']) & (all_games['field_goal_result'] == 'made')).map(lambda x:  3 if x == True else 0)

two_h = ((homesafety == 2)).map(lambda x:  2 if x == True else 0)
three_h = ((homekick == 3)).map(lambda x:  3 if x == True else 0)
six_h = ((all_games['home_team_score'] == 6) & (all_games['xp'] == 0) & (all_games['2pt'] == 0)).map(lambda x:  6 if x == True else 0)
seven_h = ((all_games['home_team_score'] == 6) & (all_games['xp'] == 1)).map(lambda x:  7 if x == True else 0)
eight_h = ((all_games['home_team_score'] == 6) & (all_games['2pt'] == 2)).map(lambda x:  8 if x == True else 0)
all_games['home_score'] = two_h + three_h + six_h + seven_h + eight_h

two_a = ((awaysafety == 2)).map(lambda x:  2 if x == True else 0)
three_a = ((awaykick == 3)).map(lambda x:  3 if x == True else 0)
six_a = ((all_games['away_team_score'] == 6) & (all_games['xp'] == 0) & (all_games['2pt'] == 0)).map(lambda x:  6 if x == True else 0)
seven_a = ((all_games['away_team_score'] == 6) & (all_games['xp'] == 1)).map(lambda x:  7 if x == True else 0)
eight_a = ((all_games['away_team_score'] == 6) & (all_games['2pt'] == 2)).map(lambda x:  8 if x == True else 0)
all_games['away_score'] = two_a +three_a + six_a + seven_a + eight_a
all_games

  all_plays_2023 = pd.read_csv('/Users/MC/Downloads/play_by_play_2023.csv')


Unnamed: 0,season_type,game_id,week,qtr,total_away_score,total_home_score,quarter_seconds_remaining,sp,posteam,defteam,...,total_away_score.1,desc,play_type,safety,home_team_score,away_team_score,xp,2pt,home_score,away_score
25,REG,2023_01_ARI_WAS,1,1,0,6,262.0,1,WAS,ARI,...,0,(4:22) (Shotgun) 14-S.Howell pass short left t...,pass,0.0,6,0,1.0,0.0,7,0
26,REG,2023_01_ARI_WAS,1,1,0,7,255.0,1,WAS,ARI,...,0,"6-J.Slye extra point is GOOD, Center-54-C.Chee...",extra_point,0.0,0,0,0.0,0.0,0,0
36,REG,2023_01_ARI_WAS,1,1,3,7,56.0,1,ARI,WAS,...,3,"(:56) 5-M.Prater 28 yard field goal is GOOD, C...",field_goal,0.0,0,0,0.0,0.0,0,3
46,REG,2023_01_ARI_WAS,1,2,6,7,811.0,1,ARI,WAS,...,6,"(13:31) 5-M.Prater 54 yard field goal is GOOD,...",field_goal,0.0,0,0,0.0,0.0,0,3
79,REG,2023_01_ARI_WAS,1,2,12,7,62.0,1,WAS,ARI,...,12,(1:02) (Shotgun) 14-S.Howell sacked at WAS 12 ...,pass,0.0,0,6,1.0,0.0,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48370,POST,2023_19_PIT_BUF,19,3,10,24,96.0,1,BUF,PIT,...,10,"(1:36) 2-T.Bass 45 yard field goal is GOOD, Ce...",field_goal,0.0,0,0,0.0,0.0,3,0
48384,POST,2023_19_PIT_BUF,19,4,16,24,637.0,1,PIT,BUF,...,16,(10:37) (Shotgun) 2-M.Rudolph pass short left ...,pass,0.0,0,6,1.0,0.0,0,7
48385,POST,2023_19_PIT_BUF,19,4,17,24,632.0,1,PIT,BUF,...,17,"9-C.Boswell extra point is GOOD, Center-46-C.K...",extra_point,0.0,0,0,0.0,0.0,0,0
48394,POST,2023_19_PIT_BUF,19,4,17,30,397.0,1,BUF,PIT,...,17,(6:37) (Shotgun) 17-J.Allen pass short middle ...,pass,0.0,6,0,1.0,0.0,7,0


We will create a dictionary called scores. This dictionary tells us how many scoring plays occured in quarter 1 for the home and away team.

In [3]:
len_h = (all_games['home_score'] > 0).map(lambda x: 1 if x == True else 0).sum()
len_a = (all_games['away_score'] > 0).map(lambda x: 1 if x == True else 0).sum()
scores = {'home_score': [len_h], 'away_score': [len_a]}

Next, we will create a dictionary of the probability that a single scoring play results in 2,3,6,7, or 8 points.

In [4]:
plays = {place:{i:(all_games[[place]] == i).sum()[place]/scores[place][0] for score in [[len_h,len_a]] for i in [2,3,6,7,8]} for place in ['home_score','away_score']}
plays

{'home_score': {2: 0.010752688172043012,
  3: 0.39619520264681557,
  6: 0.04218362282878412,
  7: 0.5153019023986766,
  8: 0.03556658395368073},
 'away_score': {2: 0.004651162790697674,
  3: 0.41674418604651164,
  6: 0.0586046511627907,
  7: 0.4930232558139535,
  8: 0.026976744186046512}}

We are also interested in how many scoring plays will occur for the home and away team. We will create a dictionary with the probability of how many scoring plays a team has.

In [5]:
num_scores = {place:plays_2023[plays_2023['posteam'] == plays_2023[place]].groupby('game_id',as_index = False).sum()['sp'].value_counts() for place in ['home_team','away_team']}
num_scores['home_score'] = num_scores['home_team'] / sum(num_scores['home_team'] )
num_scores['away_score'] = num_scores['away_team'] / sum(num_scores['away_team'] )
pd.DataFrame(num_scores)

Unnamed: 0_level_0,home_team,away_team,home_score,away_score
sp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,3,3.0,0.010791,0.010791
1,4,10.0,0.014388,0.035971
2,9,13.0,0.032374,0.046763
3,21,34.0,0.07554,0.122302
4,15,27.0,0.053957,0.097122
5,41,43.0,0.147482,0.154676
6,43,43.0,0.154676,0.154676
7,36,31.0,0.129496,0.111511
8,35,21.0,0.125899,0.07554
9,31,24.0,0.111511,0.086331


We will create a function num_sp() which uses the num_scores dictionaries above to generate how many scoring plays a team has per game for a given number of games.

In [6]:
rng = np.random.default_rng()
def num_sp(n,team):
    sp = rng.choice(a=list(num_scores[f"{team}_score"].index), size=n, p=list(num_scores[f"{team}_score"].values))
    return sp

The function games() generate a distribution of single-scoring plays.

In [7]:
rng = np.random.default_rng()
def games(n,team):
    i = 1
    if (team == 'home'): i = 0
    scores_in_game = rng.choice(a=list(plays[f"{team}_score"].keys()), size=n, p=pd.DataFrame(plays.values()).iloc[i])
    return scores_in_game

The function scores() combines the previous functions and dictionaries to print simulated scores for a certain number of games.

In [8]:
def scores(n):
    home_score_plays = num_sp(n,'home')
    away_score_plays = num_sp(n,'away')
    scores_list = []
    for i in range(len(home_score_plays)):
        h = int(sum(games(home_score_plays[i],'home')))
        a = int(sum(games(away_score_plays[i],'away')))
        if h < a:
            scores_list.append((h,a))
        else:
            scores_list.append((a,h))
    return scores_list

For instance, for a 1000 games, these are simulated scores

In [9]:
scores(1000)

[(29, 30),
 (37, 66),
 (22, 37),
 (23, 47),
 (30, 31),
 (40, 44),
 (25, 36),
 (35, 35),
 (30, 36),
 (30, 58),
 (9, 49),
 (23, 27),
 (13, 45),
 (32, 34),
 (45, 58),
 (30, 40),
 (34, 56),
 (26, 29),
 (9, 23),
 (14, 52),
 (30, 51),
 (42, 64),
 (34, 57),
 (12, 27),
 (26, 61),
 (15, 65),
 (44, 60),
 (26, 34),
 (24, 57),
 (30, 37),
 (19, 33),
 (8, 51),
 (17, 41),
 (36, 55),
 (33, 43),
 (13, 24),
 (20, 31),
 (30, 46),
 (23, 53),
 (24, 49),
 (27, 70),
 (41, 43),
 (31, 39),
 (26, 36),
 (30, 35),
 (31, 51),
 (34, 40),
 (44, 55),
 (30, 41),
 (20, 23),
 (24, 40),
 (7, 55),
 (27, 52),
 (30, 31),
 (27, 47),
 (40, 41),
 (17, 31),
 (20, 39),
 (13, 27),
 (14, 47),
 (28, 29),
 (37, 51),
 (15, 19),
 (34, 53),
 (31, 35),
 (32, 45),
 (37, 40),
 (33, 72),
 (43, 44),
 (24, 47),
 (0, 31),
 (21, 41),
 (37, 48),
 (9, 37),
 (24, 50),
 (14, 61),
 (23, 36),
 (16, 17),
 (27, 39),
 (13, 35),
 (34, 41),
 (6, 23),
 (17, 29),
 (15, 37),
 (7, 52),
 (9, 26),
 (27, 44),
 (42, 44),
 (36, 43),
 (48, 53),
 (13, 15),
 (24, 36