In [2]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from imp import load_source

aps = load_source(  'aggregate_player_stats', '../exploration/aggregate_player_stats.py' )
nps = load_source(  'normalize_player_stats', '../normalization/normalize_player_stats.py' )

%matplotlib inline

plt.rcParams['figure.figsize'] = (14,10)
plt.style.use('dark_background')

In [31]:
position = 'QB'
n_weeks  = 4
end_year = 2017

my_df = aps.generate_player_features( end_year, position, n_weeks=n_weeks )

In [8]:
target_variables = [
                    'rush_yds',
                    'rush_tds',
                    'pass_yds',
                    'pass_tds',
                    'turnovers'
                   ]

meta_info     = [
                 'player_id',
                 'team',
                 'week',
                 'year',
                 'team_home_frac_prev_4',
                 'few_reg_weeks'
                ]


player_scales = [
                 'pass_complete_prev_4',
                 'pass_incomplete_prev_4',
                 'pass_air_yds_prev_4',
                 'pass_air_yds_max_prev_4',
                 'pass_yds_prev_4',
                 'pass_att_prev_4'
                ]

player_norms = [
                'pass_int_prev_4',
                'pass_tds_prev_4',
                'rush_yds_prev_4',
                'rush_tds_prev_4',
                'rush_att_prev_4'
                ]

fumb_features    = [
                    'fumb_lost_prev_4', 
                    'fumb_rec_prev_4', 
                    'fumb_forced_prev_4', 
                    'fumb_nforced_prev_4'
                   ]

team_features    = [
                    'team_tds_prev_4',
                    'team_fg_made_prev_4',
                    'team_kickoffs_prev_4',
                    'team_punts_prev_4'
                   ]


opp_features     = [
                    'opp_avg_tds_prev_4', 
                    'opp_avg_fg_made_prev_4',
                    'opp_avg_rush_yds_prev_4', 
                    'opp_avg_pass_yds_prev_4',
                    'opp_avg_def_tkl_loss_prev_4', 
                    'opp_avg_def_sack_prev_4', 
                    'opp_avg_def_pass_def_prev_4'
                   ]

In [36]:
# Make turnovers a feature
temp_df = my_df.copy()

temp_df['turnovers'] = temp_df['pass_int']+temp_df['fumb_lost']
temp_df = temp_df.drop( ['pass_int','fumb_lost'], axis=1 )

# Not calc the frac of home games
temp_df['team_home_frac_prev_4'] = temp_df['team_home_flag_prev_4'] / \
                                  (temp_df['team_home_flag_prev_4'] +
                                   temp_df['team_away_flag_prev_4'] +
                                   0.                               )
temp_df = temp_df.drop( ['team_home_flag_prev_4','team_away_flag_prev_4'], axis=1 )

In [74]:
# Let's start by training
#  on all data pre-2016
# Apparently 2017 preseason data missing from the db
my_train_data = temp_df.loc[ my_df['year']< 2016 ].reset_index()
my_test_data  = temp_df.loc[ my_df['year']>=2016 ].reset_index()

In [78]:
# Dictionary to save normalization data in
norm_dict = {}

# Do seperately
df_list = [
            my_train_data,
            my_test_data
          ]

out_df_list = []

# By looping over these,
#  will set data using train data,
#  then apply stuff to test data
# Adds then uses values from the dict
for frame in df_list:
    
    
    # Grab some normalized fumble features
    fumb_junk = nps.fumb_normalization( 
                                        frame, 
                                        fumb_features,
                                        new_dict
                                      )
    
    # Some normalized team features
    team_junk = nps.team_normalization( 
                                        frame,
                                        team_features,
                                        norm_dict,
                                        'team_fg_miss_prev_4'
                                      )
    
    # Some opposing team features
    opp_junk = nps.opp_normalization( 
                                      frame,
                                      opp_features,
                                      norm_dict
                                    )
    
    # The QB features
    qb_junk = nps.qb_normalization(
                                    frame,
                                    player_norms,
                                    player_scales,
                                    norm_dict
                                  )
    
    # What we will output
    new_frame = frame[meta_info+target_variables].reset_index()

    # Combine all the stuff into 1 frame
    out_df_list.append(
                        pd.concat( [
                                    new_frame,
                                    qb_junk,
                                    fumb_junk,
                                    team_junk,
                                    opp_junk
                                   ], axis=1).copy()
                      )
    
    
train_norm_data = out_df_list[0].copy()
test_norm_data  = out_df_list[1].copy()
out_df_list     = 0