In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 50)

In [2]:
games = pd.read_csv('nfl-football-player-stats/games_1995.csv')
players = pd.read_csv('nfl-football-player-stats/players_1995.csv')
gamesDef = pd.read_csv('defData12-17.csv')
gamesDef = gamesDef.drop(columns=["Rk","Time","LTime"])

In [3]:
half_ppr = {
    'rushing_yards': 0.1,
    'rushing_touchdowns': 6,
    'receiving_receptions': 0.5,
    'receiving_yards': 0.1,
    'receiving_touchdowns': 6
}

In [4]:
def get_ros_stats(games, players, player_ids, year, nfl_week, points_dict):
    # Returns pandas Dataframe with columns player_id, name, year, nfl_week, ROS_ppg, ROS_games
    for_df = []
    for p_id in player_ids:  
        ROS = games[(games.player_id == p_id) & (games.year == year) & 
                    (games.game_number >= nfl_week) & (games.game_number <= 16)]
        ROS_games = len(ROS)
        ROS_pts = 0
        for stat, value in zip(points_dict.keys(), points_dict.values()):
            ROS_pts += sum(ROS[stat])*value
        ROS_ppg = np.round(ROS_pts/ROS_games,2)
        name = players[players['player_id'] == p_id]['name'].item()
        for_df.append([p_id, name, year, nfl_week, ROS_ppg, ROS_games])
    df = pd.DataFrame(for_df, columns =['player_id', 'name', 'year', 'nfl_week', 'ROS_ppg', 'ROS_games'])
    return df
# usage example to get ROS stats for AP and Foster in 2011
df = get_ros_stats(games, players, [17463, 7173], 2011, 5, half_ppr)
df

Unnamed: 0,player_id,name,year,nfl_week,ROS_ppg,ROS_games
0,17463,Adrian Peterson,2011,5,16.61,8
1,7173,Arian Foster,2011,5,23.05,11


In [5]:
def get_features(games, players, player_ids, year, nfl_week, points_dict):
    for_df = []
    for p_id in player_ids:
        p_prev_years = games[(games.player_id == p_id) & (games.year < year)
                       & (games.game_number <= 16)]
        p_current_year = games[(games.player_id == p_id) & (games.year == year) 
                                 & (games.game_number < nfl_week) & (games.game_number <= 16)]
        df = pd.concat((p_prev_years, p_current_year))
        df.sort_values(by=['year','game_number'], axis=0, ascending=False, inplace=True)
        fpts = 0
        for stat, value in zip(points_dict.keys(), points_dict.values()):
            fpts += df[stat]*value
        df['fpts'] = fpts
        num_prev = len(fpts)
        last_3 = 0
        next_15 = 0
        to_debut = 0
        if num_prev >= 3:
            last_3 = np.round(np.median(df['fpts'][0:3]),2)
        if num_prev >= 18:
            next_15 = np.round(np.median(df['fpts'][3:18]),2)
        if num_prev >= 19:
            to_debut = np.round(np.median(df['fpts'][18:]), 2)
        player = players[players['player_id'] == p_id]
        for_df.append([p_id,player.name.item(),player.position.item(),year,nfl_week,num_prev,last_3,next_15,to_debut])
    df = pd.DataFrame(for_df, columns=['player_id','name','position','year','nfl_week','num_prev','last_3','next_15','to_debut'])
    return df
# usage example
a = get_features(games, players, [17463, 7173], 2011, 5, half_ppr)
a

Unnamed: 0,player_id,name,position,year,nfl_week,num_prev,last_3,next_15,to_debut
0,17463,Adrian Peterson,RB,2011,5,65,17.7,13.6,17.3
1,7173,Arian Foster,RB,2011,5,24,24.1,23.9,6.65


In [19]:
players[players['player_id'] == 17463]

Unnamed: 0,birth_date,birth_place,college,current_salary,current_team,death_date,draft_position,draft_round,draft_team,draft_year,height,high_school,hof_induction_year,name,player_id,position,weight
2341,1985-03-21,"Palestine, TX",Oklahoma,1000000,Arizona Cardinals,,7.0,1.0,Minnesota Vikings,2007.0,6-2,"Palestine, TX",,Adrian Peterson,17463,RB,217.0


In [18]:
games[games['player_id'] == 17463]

Unnamed: 0,age,date,defense_interception_touchdowns,defense_interception_yards,defense_interceptions,defense_sacks,defense_safeties,defense_tackle_assists,defense_tackles,field_goal_attempts,field_goal_makes,game_location,game_number,game_won,kick_return_attempts,kick_return_touchdowns,kick_return_yards,opponent,opponent_score,passing_attempts,passing_completions,passing_interceptions,passing_rating,passing_sacks,passing_sacks_yards_lost,passing_touchdowns,passing_yards,player_id,player_team_score,point_after_attemps,point_after_makes,punt_return_attempts,punt_return_touchdowns,punt_return_yards,punting_attempts,punting_blocked,punting_yards,receiving_receptions,receiving_targets,receiving_touchdowns,receiving_yards,rushing_attempts,rushing_touchdowns,rushing_yards,team,year
146159,22-172,2007-09-09,0,0,0,0.0,0,0,0,0,0,H,1,True,1,0,22,ATL,3,0,0,0,0.0,0,0,0,0,17463,24,0,0,0,0,0,0,0,0,1,1,1,60,19,0,103,MIN,2007
146160,22-179,2007-09-16,0,0,0,0.0,0,0,0,0,0,A,2,False,0,0,0,DET,20,0,0,0,0.0,0,0,0,0,17463,17,0,0,0,0,0,0,0,0,4,5,0,52,20,0,66,MIN,2007
146161,22-186,2007-09-23,0,0,0,0.0,0,0,0,0,0,A,3,False,0,0,0,KAN,13,0,0,0,0.0,0,0,0,0,17463,10,0,0,0,0,0,0,0,0,3,3,0,48,25,1,102,MIN,2007
146162,22-193,2007-09-30,0,0,0,0.0,0,0,0,0,0,H,4,False,3,0,98,GNB,23,0,0,0,0.0,0,0,0,0,17463,16,0,0,0,0,0,0,0,0,1,3,0,6,12,0,112,MIN,2007
146163,22-207,2007-10-14,0,0,0,0.0,0,0,0,0,0,A,5,True,4,0,128,CHI,31,0,0,0,0.0,0,0,0,0,17463,34,0,0,0,0,0,0,0,0,1,2,0,9,20,3,224,MIN,2007
146164,22-214,2007-10-21,0,0,0,0.0,0,0,0,0,0,A,6,False,3,0,65,DAL,24,0,0,0,0.0,0,0,0,0,17463,14,0,0,0,0,0,0,0,0,1,2,0,12,12,1,63,MIN,2007
146165,22-221,2007-10-28,0,0,0,0.0,0,0,0,0,0,H,7,False,2,0,11,PHI,23,0,0,0,0.0,0,0,0,0,17463,16,0,0,0,0,0,0,0,0,0,0,0,0,20,0,70,MIN,2007
146166,22-228,2007-11-04,0,0,0,0.0,0,0,0,0,0,H,8,True,0,0,0,SDG,17,0,0,0,0.0,0,0,0,0,17463,35,0,0,0,0,0,0,0,0,1,2,0,19,30,3,296,MIN,2007
146167,22-235,2007-11-11,0,0,0,0.0,0,0,0,0,0,A,9,False,0,0,0,GNB,34,0,0,0,0.0,0,0,0,0,17463,0,0,0,0,0,0,0,0,0,3,4,0,14,11,0,45,MIN,2007
146168,22-256,2007-12-02,0,0,0,0.0,0,0,0,0,0,H,12,True,0,0,0,DET,10,0,0,0,0.0,0,0,0,0,17463,42,0,0,0,0,0,0,0,0,1,2,0,10,15,2,116,MIN,2007


In [75]:
def get_def_data(gamesDef, team, current_week, year):
    df = gamesDef[gamesDef["Tm"] == team]
    df = df[df["Year"] == year]
    df = df[df["Week"] < current_week]
    avg_d_plys = df["D Plays"].mean() #average number of defensive plays
    avg_dyp = df["DY/P"].mean() #average years allowed per defensive play
    tot_dplys = df["D Plays"].sum()
    w_avg_dyp = 0
    for i in range(len(df)):
        w_avg_dyp += (df["D Plays"].values[i]/tot_dplys)*(df["DY/P"].values[i])
    print(w_avg_dyp, avg_dyp) #want to use weighted avg or avg?
    return df

In [76]:
get_def_data(gamesDef, 'SEA', 16, 2015)

5.060993660287081 4.906635714285715


Unnamed: 0,Tm,Year,Date,Opp,Week,Game,Result,Total Yardage,O Plays,Y/P,D Plays,DY/P,TO
173,SEA,2015,11/29/2015,PIT,12,11,W 39-30,436,59,7.3898,75,7.1733,
558,SEA,2015,9/13/2015,STL,1,1,L 31-34,343,79,4.3418,55,6.4,1.0
1013,SEA,2015,11/22/2015,SFO,11,10,W 29-13,508,75,6.7733,52,5.8846,
1234,SEA,2015,9/20/2015,GNB,2,2,L 17-27,324,57,5.6842,64,5.6406,2.0
1384,SEA,2015,12/13/2015,BAL,14,13,W 35-6,424,69,6.1449,55,5.4909,1.0
1448,SEA,2015,11/15/2015,ARI,10,9,L 32-39,343,52,6.5962,83,5.4337,1.0
1568,SEA,2015,10/18/2015,CAR,6,6,L 23-27,334,60,5.5667,72,5.3194,
1590,SEA,2015,10/11/2015,CIN,5,5,L 24-27,397,57,6.9649,79,5.3038,1.0
2119,SEA,2015,10/5/2015,DET,4,4,W 13-10,345,63,5.4762,53,4.8302,3.0
2495,SEA,2015,12/20/2015,CLE,15,14,W 30-13,423,68,6.2206,52,4.4231,


In [83]:
get_def_data(gamesDef, 'NOR', 16, 2015)

6.570944381384789 6.58827857142857


Unnamed: 0,Tm,Year,Date,Opp,Week,Game,Result,Total Yardage,O Plays,Y/P,D Plays,DY/P,TO
12,NOR,2015,11/15/2015,WAS,10,10,L 14-47,350,55,6.3636,59,8.6441,2.0
63,NOR,2015,12/21/2015,DET,15,14,L 27-35,399,71,5.6197,51,7.7647,
105,NOR,2015,9/13/2015,ARI,1,1,L 19-31,408,70,5.8286,57,7.4912,1.0
165,NOR,2015,11/8/2015,TEN,9,9,L 28-34,416,70,5.9429,67,7.209,2.0
261,NOR,2015,9/27/2015,CAR,3,3,L 22-27,380,63,6.0317,62,6.9516,2.0
448,NOR,2015,10/11/2015,PHI,5,5,L 17-39,388,69,5.6232,79,6.5696,4.0
467,NOR,2015,12/6/2015,CAR,13,12,L 38-41,334,58,5.7586,76,6.5395,1.0
557,NOR,2015,11/1/2015,NYG,8,8,W 52-49,608,76,8.0,65,6.4,2.0
739,NOR,2015,10/25/2015,IND,7,7,W 27-21,449,83,5.4096,61,6.1639,1.0
991,NOR,2015,10/15/2015,ATL,6,6,W 31-21,385,72,5.3472,70,5.9,
