In [98]:
import pandas as pd
import numpy as np

In [51]:
games = pd.read_csv('nfl-football-player-stats/games_1995.csv')
players = pd.read_csv('nfl-football-player-stats/players_1995.csv')

In [311]:
half_ppr = {
    'rushing_yards': 0.1,
    'rushing_touchdowns': 6,
    'receiving_receptions': 0.5,
    'receiving_yards': 0.1,
    'receiving_touchdowns': 6
}

In [385]:
def get_ros_stats(games, players, player_ids, year, nfl_week, points_dict):
    # Returns pandas Dataframe with columns player_id, name, year, nfl_week, ROS_ppg, ROS_games
    for_df = []
    for p_id in player_ids:  
        ROS = games[(games.player_id == p_id) & (games.year == year) & 
                    (games.game_number >= nfl_week) & (games.game_number <= 16)]
        ROS_games = len(ROS)
        ROS_pts = 0
        for stat, value in zip(points_dict.keys(), points_dict.values()):
            ROS_pts += sum(ROS[stat])*value
        ROS_ppg = np.round(ROS_pts/ROS_games,2)
        name = players[players['player_id'] == p_id]['name'].item()
        for_df.append([p_id, name, year, nfl_week, ROS_ppg, ROS_games])
    df = pd.DataFrame(for_df, columns =['player_id', 'name', 'year', 'nfl_week', 'ROS_ppg', 'ROS_games'])
    return df
# usage example to get ROS stats for AP and Foster in 2011
df = get_ros_stats(games, players, [17463, 7173], 2011, 5, half_ppr)
df

Unnamed: 0,player_id,name,year,nfl_week,ROS_ppg,ROS_games
0,17463,Adrian Peterson,2011,5,16.61,8
1,7173,Arian Foster,2011,5,23.05,11


In [386]:
def get_features(games, players, player_ids, year, nfl_week, points_dict):
    for_df = []
    for p_id in player_ids:
        p_prev_years = games[(games.player_id == p_id) & (games.year < year)
                       & (games.game_number <= 16)]
        p_current_year = games[(games.player_id == p_id) & (games.year == year) 
                                 & (games.game_number < nfl_week) & (games.game_number <= 16)]
        df = pd.concat((p_prev_years, p_current_year))
        df.sort_values(by=['year','game_number'], axis=0, ascending=False, inplace=True)
        fpts = 0
        for stat, value in zip(points_dict.keys(), points_dict.values()):
            fpts += df[stat]*value
        df['fpts'] = fpts
        num_prev = len(fpts)
        last_3 = 0
        next_15 = 0
        to_debut = 0
        if num_prev >= 3:
            last_3 = np.round(np.median(df['fpts'][0:3]),2)
        if num_prev >= 18:
            next_15 = np.round(np.median(df['fpts'][3:18]),2)
        if num_prev >= 19:
            to_debut = np.round(np.median(df['fpts'][18:]), 2)
        player = players[players['player_id'] == p_id]
        for_df.append([p_id,player.name.item(),player.position.item(),year,nfl_week,num_prev,last_3,next_15,to_debut])
    df = pd.DataFrame(for_df, columns=['player_id','name','position','year','nfl_week','num_prev','last_3','next_15','to_debut'])
    return df
# usage example
a = get_features(games, players, [17463, 7173], 2011, 5, half_ppr)
a

Unnamed: 0,player_id,name,position,year,nfl_week,num_prev,last_3,next_15,to_debut
0,17463,Adrian Peterson,RB,2011,5,65,17.7,13.6,17.3
1,7173,Arian Foster,RB,2011,5,24,24.1,23.9,6.65


In [387]:
subset_position = {
    'RB': [['rushing_attempts', 'receiving_targets'], 5]
}

In [388]:
def get_playerids_thatweek(games, players, position, year, nfl_week, subset_position):
    stats = subset_position[position][0]
    sum_threshold = subset_position[position][1]
    subset = games[(games.year == year) & (games.game_number == nfl_week) &
                   (np.sum(games[stats], axis=1) >= sum_threshold)]
    wanted = players[(players.player_id.isin(subset.player_id)) &
                        (players.position == position)]
    return wanted.player_id

In [389]:
ids = get_playerids_thatweek(games,players,'RB',2011,5,subset_position)
feats = get_features(games, players, ids, 2011, 5, half_ppr)
ros = get_ros_stats(games, players, ids, 2011, 5, half_ppr)
ros

Unnamed: 0,player_id,name,year,nfl_week,ROS_ppg,ROS_games
0,22024,Daniel Thomas,2011,5,4.13,11
1,10839,Steven Jackson,2011,5,14.71,12
2,22426,Ryan Torain,2011,5,1.69,7
3,10524,Kendall Hunter,2011,5,5.38,12
4,11580,Felix Jones,2011,5,7.18,8
5,18312,Isaac Redman,2011,5,5.27,12
6,22656,Michael Turner,2011,5,13.74,12
7,8152,Frank Gore,2011,5,11.73,12
8,13751,Marshawn Lynch,2011,5,18.64,11
9,16086,DeMarco Murray,2011,5,14.0,9
