In [2]:
import pandas as pd
pd.set_option("display.max_columns", None)
import matplotlib.pyplot as plt

In [3]:
file = "data/kicking/player_stats_kicking_2021.csv.gz"
df = pd.read_csv(file, compression='gzip', error_bad_lines=False)
df.head(6)
# Note: data is broken down by week for each kicker

Unnamed: 0,season,week,season_type,team,player_name,player_id,fg_made,fg_missed,fg_blocked,fg_long,fg_att,fg_pct,pat_made,pat_missed,pat_blocked,pat_att,pat_pct,fg_made_distance,fg_missed_distance,fg_blocked_distance,gwfg_att,gwfg_distance,gwfg_made,gwfg_missed,gwfg_blocked,fg_made_0_19,fg_made_20_29,fg_made_30_39,fg_made_40_49,fg_made_50_59,fg_made_60_,fg_missed_0_19,fg_missed_20_29,fg_missed_30_39,fg_missed_40_49,fg_missed_50_59,fg_missed_60_,fg_made_list,fg_missed_list,fg_blocked_list
0,2021,1,REG,ARI,M.Prater,00-0023853,1,1,0,34,2,0.5,5,0,0,5,1.0,34,43,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,34,43.0,
1,2021,1,REG,ATL,Y.Koo,00-0033702,2,0,0,27,2,1.0,0,0,0,0,,48,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,21;27,,
2,2021,1,REG,BAL,J.Tucker,00-0029597,2,0,0,47,2,1.0,3,0,0,3,1.0,87,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,40;47,,
3,2021,1,REG,BUF,T.Bass,00-0036162,3,0,0,42,3,1.0,1,0,0,1,1.0,104,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,37;25;42,,
4,2021,1,REG,CAR,R.Santoso,00-0034648,2,0,0,29,2,1.0,1,1,0,2,0.5,51,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,22;29,,
5,2021,1,REG,CHI,C.Santos,00-0031203,0,0,0,0,0,,2,0,0,2,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,


## Functions to correct made_fg_list and missed_fg_list: 
currently ["21;27"] - change to [21, 27]

In [4]:
def correct_made_list(made_list):
    if(str(made_list) == "nan"):
        new_list = []
    else:
        split_list = made_list.split(";")
        new_list = [int(x) for x in split_list]
    return(new_list)

In [5]:
def correct_missed_list(missed_list):
    if(str(missed_list) == "nan"):
        new_list = []
    else:
        split_list = missed_list.split(";")
        new_list = [int(x) for x in split_list]
    return(new_list)

In [6]:
df["fg_made_list"] = df["fg_made_list"].apply(correct_made_list)
df["fg_missed_list"] = df["fg_missed_list"].apply(correct_missed_list)

## Function to calculate season average for field goals from all distances

In [7]:
def calculate_basic_season_average(df):
    made = df["fg_made"].sum()
    missed = df["fg_missed"].sum()
    basic_ave = round(100*made/(made+missed), 1)
    print(f"The overall average for field goals was: {basic_ave}%")
    
    return(basic_ave)

In [8]:
basic_average = calculate_basic_season_average(df)

The overall average for field goals was: 86.4%


## Look at extra point conversion (calculate expected value)

In [9]:
made_xp = df["pat_made"].sum()
attempted_xp = df["pat_missed"].sum() + made_xp
xp_perc = round(made_xp/attempted_xp, 4)
print(f"There were {attempted_xp} attempted extra points and {made_xp} were made for a percent of {round(100*xp_perc,2)}%")

There were 1310 attempted extra points and 1231 were made for a percent of 93.97%


## Function to collect data for each fg distance

In [10]:
def make_fg_by_yard_dict(df):
    # dictionary to hold [attempts, makes] for each distance tried
    fg_dict = {}
    
    # check each row for made ande missed kicks
    for row in range(len(df)):
        made_list = df.loc[row, "fg_made_list"]
        if(len(made_list) > 0):
            # go through each made kick
            for j in range(len(made_list)):
                try: # increase attmpts and makes
                    fg_dict[int(made_list[j])][0] += 1
                    fg_dict[int(made_list[j])][1] += 1
                except: # create key/value for that kick distance
                    fg_dict[int(made_list[j])] = [1,1]
        
        missed_list = df.loc[row, "fg_missed_list"]
        if(len(missed_list) > 0):
            for j in range(len(missed_list)):
                try:
                    fg_dict[int(missed_list[j])][0] += 1
                except:
                    fg_dict[int(missed_list[j])] = [1,0]
        
    return(fg_dict)                    

### Create dictionary and convert to percent, then sort by yards

In [11]:
data_dict = make_fg_by_yard_dict(df)

fg_perc_dict = {}
for key in data_dict:
    attempts = data_dict[key][0]
    made = data_dict[key][1]
    fg_perc_dict[key] = round(100*made/attempts, 2)

sorted_fg_perc_dict = dict(sorted(fg_perc_dict.items(), key=lambda item: item[0]))
print(f"Number of unique yard kicks: {len(sorted_fg_perc_dict)}") #47
sorted_fg_perc_dict

Number of unique yard kicks: 47


{19: 100.0,
 20: 100.0,
 21: 100.0,
 22: 100.0,
 23: 100.0,
 24: 100.0,
 25: 95.65,
 26: 100.0,
 27: 100.0,
 28: 100.0,
 29: 100.0,
 30: 100.0,
 31: 97.3,
 32: 91.3,
 33: 96.3,
 34: 96.88,
 35: 90.62,
 36: 87.18,
 37: 97.06,
 38: 100.0,
 39: 90.32,
 40: 87.1,
 41: 82.14,
 42: 72.41,
 43: 84.62,
 44: 80.77,
 45: 82.14,
 46: 80.49,
 47: 68.0,
 48: 82.93,
 49: 69.57,
 50: 77.78,
 51: 63.33,
 52: 81.08,
 53: 67.65,
 54: 72.22,
 55: 68.42,
 56: 46.15,
 57: 50.0,
 58: 50.0,
 59: 0.0,
 60: 0.0,
 61: 66.67,
 62: 100.0,
 65: 0.0,
 66: 100.0,
 68: 0.0}

## Plot fg accuracy by distance with horizontal line for basic average

In [None]:
plt.bar(x=list(sorted_fg_perc_dict.keys()), height=list(sorted_fg_perc_dict.values()))
plt.axhline(y=basic_average, linewidth=1, color='r')
None

## Make a bet for each kick and see which teams would have made money
Need to figure out what to do if/when a distance has a success rate of 100% - currently has a payout of $0.00

Need to figure out what to do if/when a distance has a success rate of 0% - currently skipped over

**Idea - linear regression for line of best fit and use that line to create betting lines**

In [None]:
def team_ave_vs_league_ave(fg_dict, df):  
    # Use data dictionary to create new dictionary with key being fg distance, value being payout
    payout_dict = {}
    for elem in fg_dict:
        made = fg_dict[elem][1]
        missed = fg_dict[elem][0] - made
        wager = 10
        if(made > 0): # only have payouts for 
            payout = (missed/made)*wager
            payout_dict[elem] = payout   
    
    # create a dictionary of all teams where the value will be the team's betting profits/losses
    teams = list(df["team"].unique())
    team_dict = {}
    for i in range(len(teams)):
        team_dict[teams[i]] = 0
   
   # go through each row and make bets for each kick and add/subtract total from team's total
    for row in range(len(df)):
        # access list of made kicks
        made_kicks = df.loc[row, "fg_made_list"]  
        # if list is not empty
        if(str(df.loc[row, "fg_made_list"]) != "nan"):
            team = df.loc[row, "team"]
            for j in range(len(made_kicks)):
                team_dict[team] += payout_dict[int(made_kicks[j])]
    
        # access list of missed kicks
        missed_kicks = df.loc[row, "fg_missed_list"]  
        # if list is not empty
        if(str(df.loc[row, "fg_missed_list"]) != "nan"):
            team = df.loc[row, "team"]
            for j in range(len(missed_kicks)):
                team_dict[team] -= wager
            
    # round winnings
    for key in team_dict:
        team_dict[key] = round(team_dict[key], 2) 
        
    return(team_dict)

In [None]:
betting_dict = team_ave_vs_league_ave(data_dict, df)
betting_dict

In [None]:
sorted_betting_dict = dict(sorted(betting_dict.items(), key=lambda item: item[1], reverse=True))
sorted_betting_dict

In [None]:
sum(sorted_betting_dict.values())
# some kicks do not have any payouts, and some have never been made

## Make a bet for each kick and see which kickers would have made money

In [None]:
def player_ave_vs_league_ave(fg_dict, df):  
    # Use data dictionary to create new dictionary with key being fg distance, value being payout
    payout_dict = {}
    for elem in fg_dict:
        made = fg_dict[elem][1]
        missed = fg_dict[elem][0] - made
        wager = 10
        if(made > 0): # only have payouts for 
            payout = (missed/made)*wager
            payout_dict[elem] = payout
    
    # create a dictionary of all teams where the value will be the team's betting profits/losses
    kickers = list(df["player_name"].unique())
    kickers_dict = {}
    for i in range(len(kickers)):
        kickers_dict[kickers[i]] = 0
    
   
   # go through each row and make bets for each kick and add/subtract total from team's total
    for row in range(len(df)):
        # access list of made kicks
        made_kicks = df.loc[row, "fg_made_list"]  
        # if list is not empty
        if(str(df.loc[row, "fg_made_list"]) != "nan"):
            player = df.loc[row, "player_name"]
            for j in range(len(made_kicks)):
                kickers_dict[player] += payout_dict[int(made_kicks[j])]
    
        # access list of missed kicks
        missed_kicks = df.loc[row, "fg_missed_list"]  
        # if list is not empty
        if(str(df.loc[row, "fg_missed_list"]) != "nan"):
            player = df.loc[row, "player_name"]
            for j in range(len(missed_kicks)):
                kickers_dict[player] -= wager
            
    # round winnings
    for key in kickers_dict:
        kickers_dict[key] = round(kickers_dict[key], 2) 
        
    return(kickers_dict)

In [None]:
player_betting_dict = player_ave_vs_league_ave(data_dict, df)
player_betting_dict

In [None]:
sorted_player_betting_dict = dict(sorted(player_betting_dict.items(), key=lambda item: item[1], reverse=True))
sorted_player_betting_dict