To Do:
* rush percentage as feature
* ewma feature instead of multiple fpts features
* L1 regularization
* Decision Tree regresssion
* sklearn time series split validation on training data, or implement this ourselves (Brian G.). Holdout data on a few future weeks. Report both the cross validation and holdout performance scores.
* look at masters theses that do this
* tune number of games in moving averages and tune number of trees in random forest

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

In [63]:
all_games.columns

Index(['age', 'date', 'defense_interception_touchdowns',
       'defense_interception_yards', 'defense_interceptions', 'defense_sacks',
       'defense_safeties', 'defense_tackle_assists', 'defense_tackles',
       'field_goal_attempts', 'field_goal_makes', 'game_location',
       'game_number', 'game_won', 'kick_return_attempts',
       'kick_return_touchdowns', 'kick_return_yards', 'opponent',
       'opponent_score', 'passing_attempts', 'passing_completions',
       'passing_interceptions', 'passing_rating', 'passing_sacks',
       'passing_sacks_yards_lost', 'passing_touchdowns', 'passing_yards',
       'player_team_score', 'point_after_attemps', 'point_after_makes',
       'punt_return_attempts', 'punt_return_touchdowns', 'punt_return_yards',
       'punting_attempts', 'punting_blocked', 'punting_yards',
       'receiving_receptions', 'receiving_targets', 'receiving_touchdowns',
       'receiving_yards', 'rushing_attempts', 'rushing_touchdowns',
       'rushing_yards', 'team', 'ye

In [76]:
all_games = pd.read_csv('nfl-football-player-stats/games_1995.csv')
all_games.drop([440917], inplace=True) # delete weird Lagerrete Blount double game
all_games.loc[all_games['team'] == 'SDG', 'team'] = 'LAC'
all_games.loc[all_games['team'] == 'STL', 'team'] = 'LAR'
all_games.loc[all_games['opponent'] == 'SDG', 'opponent'] = 'LAC'
all_games.loc[all_games['opponent'] == 'STL', 'opponent'] = 'LAR'
all_games = all_games.set_index('player_id')
all_players = pd.read_csv('nfl-football-player-stats/players_1995.csv',index_col='player_id')
gamesDef = pd.read_csv('defData12-17.csv')
gamesDef.loc[gamesDef['Tm']=='SDG', 'Tm'] = 'LAC'
gamesDef.loc[gamesDef['Tm']=='STL', 'Tm'] = 'LAR'
gamesDef.loc[gamesDef['Opp']=='SDG', 'Opp'] = 'LAC'
gamesDef.loc[gamesDef['Opp']=='STL', 'Opp'] = 'LAR'
gamesDef.set_index('Tm')
gamesDef = gamesDef.drop(columns=["Rk","Time","LTime"])

In [65]:
# dictionary that has fantasy value for each nfl stat
half_ppr = {
    'rushing_yards': 0.1,
    'rushing_touchdowns': 6,
    'receiving_receptions': 0.5,
    'receiving_yards': 0.1,
    'receiving_touchdowns': 6
}

In [66]:
# subset RB position by choosing only RBs with at least 3 fantasy points in previous game
subset_position = {
    'RB': [['rushing_attempts', 'receiving_targets'], 5.0],
    'WR': [['rushing_attempts', 'receiving_targets'], 5.0]
}

In [67]:
def get_players_thatweek(all_games, all_players, position, year, game_number, subset_position, specific_players=None):
    if specific_players != None:
        ids = specific_players
    else:
        ids = all_players[all_players.position == position].index
        
    # return pandas df with player_id as index and player name and log draft position columns
    games = all_games.loc[ids]
    stats = subset_position[position][0]
    sum_threshold = subset_position[position][1]
    # only take RBs with 5 rush attempts or receiving targets (>= sum_threshold)
    worth_predicting = games[(games.year == year) & 
                             (games.game_number == game_number) &
                             (np.sum(games[stats], axis=1) >= sum_threshold)]
    ids = worth_predicting.index
    for_df = all_players.loc[ids, ['name', 'draft_position']]
    # players that weren't drafted give them position of last pick of draft
    for_df.fillna(255, inplace=True)
    for_df['log_draft_position'] = np.log(for_df.draft_position)
    return pd.DataFrame(data=for_df)

In [68]:
def get_team_stats(games, year, game_number):
    # returns: dataframe with team passing and rushing stats
    
    # start by getting team passing attempts
    prev_years = games[(games.year < year) &
                       (games.game_number <= 16)]
    current_year = games[(games.year == year) &
                         (games.game_number <= game_number)]
    passing_stats = pd.concat((prev_years, current_year))
    passing_stats = pd.concat((prev_years, current_year))
    passing_stats = passing_stats[['team', 'date', 'year', 'game_number', 'passing_attempts', 'passing_rating']] 
    passing_stats.sort_values(by=['year', 'game_number'], axis=0,ascending=True, inplace=True)
    passing_stats = passing_stats.groupby(['team', 'year', 'game_number'])
    # sum of pass attempts by anyone by team in year during game
    team_pass_attempts = passing_stats.passing_attempts.sum()
    # take the passing rating from the player with most pass_attempts
    pass_rating = passing_stats.apply(
        lambda x: x.nlargest(1,'passing_attempts')).droplevel(3)['passing_rating']
    ewma_pass_rating = pass_rating.groupby('team').apply(
        lambda x: x.ewm(span=16).mean()).rename('ewma_team_passing_rating')
    # Note: team pass attempts and pass_rating and ewma_pass_rating are hierarchical pd series

    # get total team rushing attempts
    games = games[(games.rushing_attempts > 0)]
    prev_years = games[(games.year < year) &
                       (games.game_number <= 16)]
    current_year = games[(games.year == year) &
                         (games.game_number <= game_number)]
    rushing_stats = pd.concat((prev_years, current_year))
    rushing_stats.sort_values(by=['year', 'game_number'], axis=0, ascending=True, inplace=True)
    rushing_stats = rushing_stats.groupby(['team', 'year', 'game_number'])
    rush_attempts = rushing_stats.rushing_attempts.sum()

    # combine all stats into one dataframe
    team_stats = pd.DataFrame(team_pass_attempts)
    team_stats = team_stats.join(pass_rating)
    team_stats = team_stats.join(ewma_pass_rating)
    team_stats = team_stats.join(rush_attempts)
    ewma_rush_attempts = team_stats.groupby('team').apply(
        lambda x: x['rushing_attempts'].ewm(span=16).mean()).droplevel(0).rename('ewma_team_rush_attempts')
    team_stats = team_stats.join(ewma_rush_attempts)
    team_stats['rush_percentage'] = team_stats.rushing_attempts/(
        team_stats.rushing_attempts+team_stats.passing_attempts)
    ewma_rush_perc = team_stats.groupby('team').apply(
        lambda x: x['rush_percentage'].ewm(span=16).mean()).droplevel(0).rename('ewma_team_rush_percentage')
    team_stats = team_stats.join(ewma_rush_perc)
    team_stats = team_stats.sort_values(by=['year', 'game_number'],axis=0,ascending=False).groupby('team').nth([0])
    return team_stats

In [69]:
def get_def_data(gamesDef, year, gameNumber, gamesBack = 5):
    # return def stats for all teams using last 5 weeks as Pandas Dataframe
    prevSeasonDef = gamesDef[gamesDef['Year']==(year-1)]
    currentSeasonDef = gamesDef[(gamesDef['Year']==year) & (gamesDef['Game']<=gameNumber)]
    df = pd.concat((prevSeasonDef, currentSeasonDef))
    df.sort_values(by=['Year', 'Game'], axis=0, ascending=False, inplace=True)
    grouped = df.groupby('Tm', sort=False)
    def_stats = grouped.nth(list(range(0,gamesBack))).groupby('Tm', sort=False).mean()[['DY/P', 'TO']]
    def_stats.index.rename('opp', inplace=True)
    return def_stats

In [70]:
#df2 = get_team_stats(all_games, 2016, 1)
df2

Unnamed: 0_level_0,passing_attempts,passing_rating,ewma_team_passing_rating,rushing_attempts,ewma_team_rush_attempts,rush_percentage,ewma_team_rush_percentage
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ARI,24,104.7,98.388859,19,26.432704,0.44186,0.53514
ATL,27,112.6,93.186236,22,24.726216,0.44898,0.486039
BAL,22,100.3,81.982241,28,24.401527,0.56,0.483403
BUF,15,79.9,92.942911,24,31.606905,0.615385,0.638627
CAR,18,69.5,99.807725,32,31.866786,0.64,0.621057
CHI,16,76.2,90.729656,20,27.198219,0.555556,0.566272
CIN,23,114.0,104.75653,19,28.112698,0.452381,0.57031
CLE,12,55.0,71.885403,21,24.292479,0.636364,0.540958
DAL,25,69.4,78.16737,30,25.301004,0.545455,0.548089
DEN,18,69.1,83.527484,29,27.154968,0.617021,0.542309


In [71]:
# example usage
df1 = get_def_data(gamesDef, 2016, 1, gamesBack=15)
df1

Unnamed: 0_level_0,DY/P,TO
opp,Unnamed: 1_level_1,Unnamed: 2_level_1
OAK,5.57332,2.090909
NOR,6.745927,1.7
IND,5.722467,2.444444
PIT,5.650487,2.25
NYJ,5.12812,1.916667
DET,5.6191,2.333333
WAS,6.248087,1.909091
ATL,5.58424,2.545455
LAC,5.362713,1.692308
DAL,5.727227,2.333333


In [72]:
def get_features_response(players, all_games, year, game_number, points_dict, defWeeksBack=5):
    games = all_games.loc[players.index]
    
    # compute fpts for each row
    games['fpts'] = games['game_number']*0
    for stat, value in zip(points_dict.keys(), points_dict.values()):
        games['fpts'] = games['fpts'] + games[stat]*value
    
    prev_years = games[(games.year < year) &
                       (games.game_number <= 16)]
    current_year = games[(games.year == year) &
                         (games.game_number <= game_number)]
    #current_game = games[(games.year == year) &
    #                 (games.game_number==game_number)]
    next_game = games[(games.year == year) &
                     (games.game_number == game_number+1)]
    df = pd.concat((prev_years, current_year))
    # only keep columns needed for indiviual statistics
    df = df[['team', 'date', 'year', 'game_number', 'fpts', 'rushing_attempts', 'receiving_targets']]
    df.sort_values(by=['player_id', 'year', 'game_number'], axis=0,
                   ascending=True, inplace=True)
    
    ## Individual Statistics
    # group dataframe by index
    df['num_games'] = df.groupby(df.index).cumcount().rename('num_games') + 1
    df['log_num_games'] = np.log(df.num_games)
    df['ewma_rushing_attempts'] = df.groupby(df.index).apply(
        lambda x: x['rushing_attempts'].ewm(span=16).mean()).droplevel(0).rename('ewma_rushing_attempts')
    df['ewma_receiving_targets'] = df.groupby(df.index).apply(
        lambda x: x['receiving_targets'].ewm(span=16).mean()).droplevel(0).rename('ewma_receiving_targets')
    df['ewma_fpts'] = df.groupby(df.index).apply(
        lambda x: x['fpts'].ewm(span=16).mean()).droplevel(0).rename('ewma_fpts')
    df = df[['team', 'date', 'year', 'log_num_games', 'game_number', 'fpts', 'ewma_fpts', 'ewma_rushing_attempts', 'ewma_receiving_targets']]
    df = df.sort_values(by=['year','game_number'],axis=0,ascending=False).groupby('player_id').nth([0])

    ## Team Statistics
    # get rush_percentage, rushing_attempts, and passer rating of teams
    team_stats = get_team_stats(games = all_games, year=year, game_number=game_number)
    # join team stats to df by team
    df = df.reset_index().join(team_stats, how='left', on='team').set_index('player_id')#.drop_duplicates()
    
    # get defenseive stats
    def_stats = get_def_data(gamesDef, year, game_number, defWeeksBack)
    #def_stats.reset_index(inplace=True)
    df['opp'] = next_game['opponent']
    df['next_fpts'] = next_game['fpts']
    # Response variable is the fantasy points of the next game, NA should be zero because they
    # didn't play or didn't score
    df['next_fpts'] = df['next_fpts'].fillna(0)
    print(df)
    print(def_stats)
    df = df.join(def_stats, how='left', on='opp')
    df.sort_values(by=['player_id', 'year', 'game_number'], axis=0,
                   ascending=True, inplace=True)
    # join df to players with name and draft info
    df = players.join(df)
    
    # drop players who don't play in the next week
    df.dropna(subset=['opp'], inplace=True)
    
    #df['ros_ppg'] = rest_year.groupby('player_id').fpts.mean().rename('ros_ppg')
    #ros_games = rest_year.groupby('player_id').fpts.count().rename('ros_games', inplace=True)
    return df

In [77]:
# example usage
# train to game number 
a = get_players_thatweek(all_games, all_players,'WR',2015,15,subset_position)
get_features_response(a, all_games, 2015, 15, half_ppr)

          team        date  year  log_num_games  game_number  fpts  ewma_fpts  \
player_id                                                                       
60         GNB  2015-12-27  2015       3.332205           15   5.7   5.625360   
137        PHI  2015-12-26  2015       2.484907           15   4.5   3.810585   
152        BAL  2015-12-27  2015       3.526361           15  10.6  10.607797   
756        LAR  2015-12-27  2015       3.761200           15   4.3   9.735816   
941        SEA  2015-12-27  2015       4.343805           15  21.8  16.643173   
1524       CLE  2015-12-27  2015       3.970292           15   3.0   7.327445   
1961       SFO  2015-12-27  2015       5.220356           15   5.2   9.734565   
2544       PIT  2015-12-27  2015       4.442651           15   9.6  19.636988   
2579       CAR  2015-12-27  2015       3.258097           15   5.4   6.393132   
3142       DAL  2015-12-27  2015       3.433987           15   9.4   3.615296   
3158       BAL  2015-12-27  

Unnamed: 0_level_0,name,draft_position,log_draft_position,team,date,year,log_num_games,game_number,fpts,ewma_fpts,...,passing_rating,ewma_team_passing_rating,rushing_attempts,ewma_team_rush_attempts,rush_percentage,ewma_team_rush_percentage,opp,next_fpts,DY/P,TO
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
19449,Emmanuel Sanders,82.0,4.406719,DEN,2015-12-28,2015,4.454347,15,14.7,13.091903,...,100.3,86.671302,21,26.230159,0.437500,0.519667,LAC,12.4,5.09350,1.750000
18196,Rueben Randle,63.0,4.143135,NYG,2015-12-27,2015,4.143135,15,15.0,10.155954,...,50.7,91.120837,20,24.684831,0.500000,0.503145,PHI,15.9,5.87796,1.800000
7061,Malcom Floyd,255.0,5.541264,LAC,2015-12-24,2015,4.787492,15,5.7,6.348813,...,85.2,89.498415,25,24.589338,0.446429,0.480341,DEN,0.0,4.16782,1.400000
18802,Seth Roberts,255.0,5.541264,OAK,2015-12-24,2015,2.708050,15,4.7,6.430139,...,72.7,85.789363,24,23.840255,0.510638,0.519071,KAN,3.5,5.21218,1.666667
11611,James Jones,78.0,4.356709,GNB,2015-12-27,2015,4.905275,15,7.1,8.730194,...,66.2,89.777986,26,28.001149,0.619048,0.565070,MIN,12.2,5.84376,1.666667
11975,Jermaine Kearse,255.0,5.541264,SEA,2015-12-27,2015,3.951244,15,11.3,7.858518,...,88.4,110.612540,22,31.597021,0.468085,0.601360,ARI,10.9,5.32340,2.000000
1524,Travis Benjamin,100.0,4.605170,CLE,2015-12-27,2015,3.970292,15,3.0,7.327445,...,40.6,77.461695,36,24.962340,0.734694,0.533421,PIT,7.8,5.83710,2.400000
21460,Jaelen Strong,70.0,4.248495,HOU,2015-12-27,2015,2.197225,15,2.8,3.307872,...,116.7,91.462842,42,31.626766,0.724138,0.607628,JAX,8.6,5.78868,1.400000
10191,DeAndre Hopkins,27.0,3.295837,HOU,2015-12-27,2015,3.850148,15,21.2,15.531246,...,116.7,91.462842,42,31.626766,0.724138,0.607628,JAX,12.4,5.78868,1.400000
4614,Jerricho Cotchery,108.0,4.682131,CAR,2015-12-27,2015,5.159055,15,6.0,5.997125,...,69.0,99.121923,20,31.828894,0.540541,0.620499,TAM,12.2,5.11106,2.000000


In [22]:
## Pretend its 2016, nfl week 5

# Get all features and responses for Running Backs from START_YEAR to YEAR before GAME_NUMBER
POSITION = 'WR'
START_YEAR = 2014
YEAR = 2017
GAME_NUMBER = 11
# features 
FEATURES = ['name', 'team', 'date', 'year', 'game_number',
            'log_draft_position','log_num_games', 'ewma_fpts', 
            'ewma_team_rush_attempts', 'ewma_team_rush_percentage', 
            'ewma_team_passing_rating', 'ewma_rushing_attempts', 'ewma_receiving_targets',
            'DY/P', 'TO', 'next_fpts']
# response
RESPONSE = ['next_fpts']

# append features and response each week to these lists
feature_list = []
response_list = []

for train_year in range(START_YEAR, YEAR+1):

    # if current year don't go past nfl week
    if train_year == YEAR:
        game_limit = GAME_NUMBER
    else: # if previous year don't go past regular season (predict game number 16)
        game_limit = 15

    for train_week in range(1,game_limit+1):
        players = get_players_thatweek(all_games, all_players, POSITION, train_year, train_week, subset_position)
        train = get_features_response(players, all_games, train_year, train_week, points_dict=half_ppr, defWeeksBack=5)
        print('Got train data for year ' +str(train_year) + ' game number ' + str(train_week))
        print('Count of NAN: ' + str(train.isnull().sum()))
        feature = train[FEATURES]
        #response = train[RESPONSE]
        feature_list.append(feature)
        #response_list.append(response)

## After all feature and response training lists have been created
#  Concat lists into train dataframes
train_x = pd.concat(feature_list)
#train_y = pd.concat(response_list)

# fill NaN in train_x with zeros
train_x = train_x.fillna(0)

Got train data for year 2012 game number 1
Count of NAN: name                          0
draft_position                0
log_draft_position            0
team                          0
date                          0
year                          0
log_num_games                 0
game_number                   0
fpts                          0
ewma_fpts                     0
ewma_rushing_attempts         0
ewma_receiving_targets        0
passing_attempts              0
passing_rating                0
ewma_team_passing_rating      0
rushing_attempts              0
ewma_team_rush_attempts       0
rush_percentage               0
ewma_team_rush_percentage     0
DY/P                          0
TO                           13
next_fpts                     0
dtype: int64
Got train data for year 2012 game number 2
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year          

Got train data for year 2012 game number 12
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year                         0
log_num_games                0
game_number                  0
fpts                         0
ewma_fpts                    0
ewma_rushing_attempts        0
ewma_receiving_targets       0
passing_attempts             0
passing_rating               0
ewma_team_passing_rating     0
rushing_attempts             0
ewma_team_rush_attempts      0
rush_percentage              0
ewma_team_rush_percentage    0
DY/P                         0
TO                           0
next_fpts                    0
dtype: int64
Got train data for year 2012 game number 13
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year                         0
log

Got train data for year 2013 game number 7
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year                         0
log_num_games                0
game_number                  0
fpts                         0
ewma_fpts                    0
ewma_rushing_attempts        0
ewma_receiving_targets       0
passing_attempts             0
passing_rating               0
ewma_team_passing_rating     0
rushing_attempts             0
ewma_team_rush_attempts      0
rush_percentage              0
ewma_team_rush_percentage    0
DY/P                         0
TO                           0
next_fpts                    0
dtype: int64
Got train data for year 2013 game number 8
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year                         0
log_n

Got train data for year 2014 game number 2
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year                         0
log_num_games                0
game_number                  0
fpts                         0
ewma_fpts                    0
ewma_rushing_attempts        0
ewma_receiving_targets       0
passing_attempts             0
passing_rating               0
ewma_team_passing_rating     0
rushing_attempts             0
ewma_team_rush_attempts      0
rush_percentage              0
ewma_team_rush_percentage    0
DY/P                         0
TO                           0
next_fpts                    0
dtype: int64
Got train data for year 2014 game number 3
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year                         0
log_n

Got train data for year 2014 game number 13
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year                         0
log_num_games                0
game_number                  0
fpts                         0
ewma_fpts                    0
ewma_rushing_attempts        0
ewma_receiving_targets       0
passing_attempts             0
passing_rating               0
ewma_team_passing_rating     0
rushing_attempts             0
ewma_team_rush_attempts      0
rush_percentage              0
ewma_team_rush_percentage    0
DY/P                         0
TO                           0
next_fpts                    0
dtype: int64
Got train data for year 2014 game number 14
Count of NAN: name                         0
draft_position               0
log_draft_position           0
team                         0
date                         0
year                         0
log

KeyboardInterrupt: 

In [19]:
len(train_x)

5400

In [20]:
train_x.to_csv('/Users/nickvarberg/Desktop/School/Football-Prediction/train_data_rbs_2012_2017.csv')

In [12]:
lr = None
r_sqr_list = []
mean_abs_err_list = []
mean_test_fpts_list = []

# train mlr if not trained yet
if lr is None:

    # Train a linear regression model to predict rest of season ppg for RBs 
    lr = LinearRegression()
    lr = lr.fit(train_x, train_y)
    print('Model trained. Here are the results:')
    print('Intercept:')
    print(lr.intercept_)
    for feat, coef in zip(FEATURES, lr.coef_):
        print(feat + ' ' + str(coef))


Got train data for year 2016 game number 1
Got train data for year 2016 game number 2
Got train data for year 2016 game number 3
Got train data for year 2016 game number 4
train data: 
           log_draft_position  log_num_games  ewma_fpts  \
player_id                                                 
17858                4.836282       4.077537   8.912268   
10586                4.043051       3.091042   9.707592   
5943                 5.541264       3.737670   6.088254   
11664                4.553877       2.639057   7.504538   
23267                5.267858       2.639057  10.262829   

           ewma_team_rush_attempts  ewma_team_rush_percentage  \
player_id                                                       
17858                    27.739056                   0.559673   
10586                    26.397092                   0.547583   
5943                     26.397092                   0.547583   
11664                    25.430036                   0.500759   
23267      

In [13]:
# test linear regression
for test_week in range(GAME_NUMBER,8):
    # Get test_x and test_y
    players = get_players_thatweek(all_games, all_players, POSITION, YEAR, test_week, subset_position)
    test = get_features_response(players, all_games, YEAR, test_week, points_dict=half_ppr)
    test_x = test[FEATURES]
    test_y = test[RESPONSE]

    # fill NaN in test_x with zeros
    test_x = test_x.fillna(0)

    # Score gives R^2 of prediction of test_x wrt test_y
    # Note: R^2 is correlated to how many easy predictions
    #  (players with low fantasy points are easy to predict).
    #  R^2 decreases when predicting on fewer but better running backs.
    print('Year: ' +str(YEAR) + '  Game Number: '+ str(test_week))
    r_sqr = lr.score(test_x, test_y)
    r_sqr_list.append(r_sqr)
    mean_abs_err = round(mean_absolute_error(test_y, lr.predict(test_x)),2)
    mean_abs_err_list.append(mean_abs_err)
    mean_test_fpts = np.mean(test_y)
    mean_test_fpts_list.append(mean_test_fpts)
    
    print('R^2: ' + str(round(r_sqr,2)))
    print('Mean Abs Error: ' + str(mean_abs_err))

print('Mean R^2: ' + str(round(np.mean(r_sqr_list), 2)))
print('Overall mean abs err: ' + str(round(np.mean(mean_abs_err_list), 2)))
print('Overall mean test fpts:' + str(round(np.mean(mean_test_fpts_list),2)))

Year: 2016  Game Number: 5
R^2: 0.14
Mean Abs Error: 6.44
Year: 2016  Game Number: 6
R^2: 0.14
Mean Abs Error: 6.51
Year: 2016  Game Number: 7
R^2: 0.16
Mean Abs Error: 5.52
Mean R^2: 0.14
Overall mean abs err: 6.16
Overall mean test fpts:9.25


In [None]:
print('number of running back games: ' + str(len(train_x)))