# To do

Need to do:
* Replace NAN pts_next_year with 0 if not the most recent year of data
* Similar smart replacements so that the right data gets kept and not imputed

Requires new scraping:
* Add actual injury info, dummy variables for different injuries in the previous year(s)
* Need some sort of analysis for rookies
    * Does the team they were drafted by have a star at the same position?
    * Where were they picked in the draft?
    * Winning pct of team picked?
    * Typical college metrics including injury history
* Need some sort of comparison with ADP eventually
    * Look at ADP this year, ADP following year type of thing. Delta ADP for risers, sinkers
    * Look at second half performance and how it affects ADP??
* New coach flag

Do-able with current data:
* Can get a flag for backup RB
* "On a good team flag," maybe it's the winning percentage of the team, or a flag on > .500 winning pct


# Import packages and define functions

In [25]:
###################### Import Packages #############################################
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import statsmodels.formula.api as smf
from ffb_functions import *
pd.set_option('display.max_columns', 500)

# Assemble the database and create metrics

In [2]:
############## Assemble the database #########################################
database = data_assembly(2010, 2019)   # pull based on years
database.to_csv('ffb_db.csv', index = False)          # save as csv just in case

In [3]:
# change columns to floats
cols_to_change = ['VBD', 'PosRank', 'OvRank'
                  , 'Age'
                  , 'PPR'
                  , 'GS', 'G'
                  , 'PaTD', 'Cmp', 'Int'
                  , 'RuTD', 'ReTD'
                  , 'PaYds', 'RuYds', 'ReYds', 'Rec'
                 , 'RuY/A', 'ReYds/R'
                 , 'PaAtt', 'RuAtt'
                 , 'Tgt'
                 , '2PM', '2PP'
                 , 'Fmb', 'FL'
                 , 'TD.3']
for col in cols_to_change:
    database[col] = database[col].astype(float)
    
# create new metrics
shifted = shift_col(database, 'pts_next_year', 'PPR', -1)    # create pts_next_year column for regressions
shifted['pts_next_year'] = shifted['pts_next_year'].replace(np.nan, 0)

shifted = shift_col(shifted, 'g_next_year', 'G', -1)        # create G_next_year col for regressions
shifted['g_next_year'] = shifted['g_next_year'].replace(np.nan, 0)
shifted = shift_col(shifted, 'G_prev_year', 'G', 1)          # create games prev year for injury history
shifted = shift_col(shifted, 'G_prev_year_prev_year', 'G_prev_year', 1)  # create games prev year for injury history
shifted['ppg_next_year'] = shifted['pts_next_year'] / shifted['g_next_year']    # create ppg next year for y var
shifted['ppg_this_year'] = shifted['PPR'] / shifted['G']                              # current year ppg
shifted['delta_ppg'] = shifted['ppg_next_year'] - shifted['ppg_this_year']

shifted['season_frac_1'] = shifted['G'] / 16                                          # 1 year injury history
shifted['season_frac_2'] = (shifted['G'] + shifted['G_prev_year']) / 32               # 2 year injury history
shifted['season_frac_3'] = (shifted['G'] + shifted['G_prev_year_prev_year']) / 48     # 3 year injury history
shifted['ReYds_per_R'] = shifted['ReYds/R']                                           # ReYds per reception
shifted['RuYds_per_A'] = shifted['RuY/A']                                             # RuYds per attempt
shifted['RuTD_per_Att'] = shifted['RuTD'] / shifted['RuAtt']                          # RuTds per attempt
shifted['PaYds_per_PaAtt'] = shifted['PaYds'] / shifted['PaAtt']                      # PaYds per attempt
shifted['PaTD_per_PaAtt'] = shifted['PaTD'] / shifted['PaAtt']                        # PaTds per attempt
shifted['Cmp_per_PaAtt'] = shifted['Cmp'] / shifted['PaAtt']                        # Completions per attempt
shifted['Int_per_PaAtt'] = shifted['Int'] / shifted['PaAtt']                        # Interceptions per attempt
shifted['Rec_per_tgt'] = shifted['Rec'] / shifted['Tgt']                # Receptions per target
shifted['ReTD_per_rec'] = shifted['ReTD'] / shifted['Rec']              # ReTD's per reception
shifted['start_frac'] = shifted['GS'] / shifted['G']

# Create some per game metrics
for metric in ['Tgt', 'PaAtt', 'RuAtt']:
    new_str = metric + '_per_game'
    shifted[new_str] = shifted[metric] / shifted['G']

# Create new team flag
shifted = shift_col(shifted, 'Tm_prev_year', 'Tm', 1)
shifted['Tm_change_flag'] = shifted.apply(\
                                             lambda x: new_team(x['Tm_prev_year'], x['Tm']), axis = 1)

# Create share of team's fantasy points
gb = shifted.groupby(['Tm', 'Year']).sum().reset_index()[['Tm', 'Year', 'PPR']]
gb = gb[(gb.Tm != '2TM') & (gb.Tm != '3TM') & (gb.Tm != '4TM')]

shifted['Fant_Share'] = 0
for i in range(len(shifted)):
    if 'TM' not in shifted.Tm[i]:
        temp_tm = shifted.loc[i, 'Tm']
        temp_yr = shifted.loc[i, 'Year']
        gb_val = gb.loc[(gb.Tm == temp_tm) & (gb.Year == temp_yr), 'PPR']
        shifted.loc[i, 'Fant_Share'] = shifted.loc[i, 'PPR'] / gb_val.iloc[0]
    else:
        shifted.loc[i, 'Fant_Share'] = np.nan

In [4]:
shifted.to_csv('redraft_metrics.csv', index = False)

# Merge ADP numbers (ppr)

In [172]:
adp_frame = ADP_assembly(2010, 2020)[['Name', 'Year', 'Overall', 'Std.Dev', 'Team']]

adp_frame['Year'] = adp_frame['Year'] - 1

adp_frame = adp_frame.replace('TB', 'TAM')
adp_frame = adp_frame.replace('NO', 'NOR')
adp_frame = adp_frame.replace('KC', 'KAN')
adp_frame = adp_frame.replace('GB', 'GNB')
adp_frame = adp_frame.replace('NE', 'NWE')
adp_frame = adp_frame.replace('Odell Beckham Jr', 'Odell Beckham')
adp_frame = adp_frame.rename(columns = {'Team': 'Tm'})

frame_w_adp = shifted.merge(adp_frame, on = ['Name', 'Year'], how = 'outer')
frame_w_adp['Overall'] = frame_w_adp['Overall'].replace(np.nan, 999)
frame_w_adp[['Name', 'Year', 'Overall', 'PPR', 'pts_next_year']].sort_values(['Name', 'Year'])



Unnamed: 0,Name,Year,Overall,PPR,pts_next_year
0,A.J. Derby,2016,999.0,30.0,55.4
1,A.J. Derby,2017,999.0,55.4,13.8
2,A.J. Derby,2018,999.0,13.8,0.0
3,A.J. Feeley,2011,999.0,22.3,0.0
5756,A.J. Green,2010,96.7,,
4,A.J. Green,2011,26.2,218.0,299.8
5,A.J. Green,2012,17.2,299.8,306.6
6,A.J. Green,2013,11.5,306.6,205.3
7,A.J. Green,2014,16.7,205.3,273.7
8,A.J. Green,2015,6.4,273.7,186.4


# Pull rookie DB and merge draft capital and combine numbers

In [143]:
college = college_assembly(2000, 2020)
college = college.fillna(0)

college.loc[college.Name == 'Joshua Jacobs']
for i in range(len(college)):
    if college.Name.iloc[i] == "Joshua Jacobs":
        college.Name.iloc[i] = "Josh Jacobs"
        print(college.Name.iloc[i])

Josh Jacobs


In [144]:
rk = rookie_assembly(2000, 2020)

rk = rk[['Pick', 'Tm', 'Name', 'Age', 'College', 'Year', 'FantPos']]

comb = combine_assembly(2000, 2020)

comb = comb.rename(columns = {'Pos':'FantPos'})
comb = comb[['Name', 'Year', 'School', 'FantPos', 'Ht', 'Wt', 'Dash', 'Vertical', 'Bench', 'Broad_Jump', 'Three_Cone', 'Shuttle']]
comb = comb.loc[(comb.FantPos == 'QB') 
         | (comb.FantPos == 'WR')
        | (comb.FantPos == 'RB')
        | (comb.FantPos == 'TE')].reset_index(drop = True)

cols = ['Wt', 'Dash', 'Vertical', 'Bench', 'Broad_Jump', 'Three_Cone', 'Shuttle']
for i in cols:
    comb[i] = comb[i].astype(float)
    comb[i] = comb.groupby("FantPos").transform(lambda x: x.fillna(x.mean()))[i]


In [173]:
test1 = rk.merge(comb, on = ['Name', 'FantPos', 'Year'], how = 'outer')
test1.loc[test1.Name == 'Marcus Mariota']

test2 = test1.merge(college, on = ['Name', 'School'], how = 'outer')
test2 = test2.rename(columns = {'Year_x':'Year'})


test3 = test2.merge(frame_w_adp[['Name', 'Year', 'Overall', 'Std.Dev', 'FantPos']], on = ['Name', 'Year', 'FantPos'], how = 'inner')
test3 = test3.drop_duplicates(['Name', 'FantPos', 'Year'], keep = 'first').reset_index(drop = True)

test3.to_csv('full_college_df.csv', index = False)

In [183]:
for_redraft = test2[['Name', 'Tm', 'Year', 'Pick', 'FantPos', 'College', 'Ht', 'Wt', 'Dash', 'Vertical', 'Bench', 'Broad_Jump', 'Three_Cone'
      , 'Shuttle', 'Conf']]


final_frame = frame_w_adp.merge(for_redraft, on = ['Name', 'FantPos'], how = 'outer')
final_frame = final_frame.fillna(0)
final_frame = final_frame.rename(columns = {'Year_x': 'Year'})
final_frame = final_frame.rename(columns = {'Tm': 'Other_Tm'})
final_frame = final_frame.rename(columns = {'Tm_x': 'Tm'})
final_frame = final_frame.rename(columns = {'Tm_y': 'New_Tm'})

final_frame.to_csv('big_redraft_frame.csv', index = False)



In [184]:
final_frame.loc[final_frame.Name == 'Amari Cooper']
#for_redraft.loc[for_redraft.Name == 'Amari Cooper']
#frame_w_adp.loc[frame_w_adp.Name == 'Amari Cooper']
#adp_frame.loc[adp_frame.Name == 'Antonio Brown']
#test2.loc[test2.Name == 'Antonio Brown']

Unnamed: 0,Rk,Name,Tm,FantPos,Age,G,GS,Cmp,PaAtt,PaYds,PaTD,Int,RuAtt,RuYds,RuY/A,RuTD,Tgt,Rec,ReYds,ReYds/R,ReTD,Fmb,FL,TD.3,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank,Year,pts_next_year,g_next_year,G_prev_year,G_prev_year_prev_year,ppg_next_year,ppg_this_year,delta_ppg,season_frac_1,season_frac_2,season_frac_3,ReYds_per_R,RuYds_per_A,RuTD_per_Att,PaYds_per_PaAtt,PaTD_per_PaAtt,Cmp_per_PaAtt,Int_per_PaAtt,Rec_per_tgt,ReTD_per_rec,start_frac,Tgt_per_game,PaAtt_per_game,RuAtt_per_game,Tm_prev_year,Tm_change_flag,Fant_Share,Overall,Std.Dev,New_Tm,Other_Tm,Year_y,Pick,College,Ht,Wt,Dash,Vertical,Bench,Broad_Jump,Three_Cone,Shuttle,Conf
196,57,Amari Cooper,OAK,WR,21.0,16.0,15.0,0.0,0.0,0.0,0.0,0.0,3.0,-3.0,-1.0,0.0,130.0,72.0,1070.0,14.86,6.0,1.0,1.0,6.0,0.0,0.0,141,212.7,216.7,176.7,16.0,25.0,57.0,2015.0,232.3,16.0,0.0,0.0,14.51875,13.29375,1.225,1.0,0.0,0.0,14.86,-1.0,0.0,0.0,0.0,0.0,0.0,0.553846,0.083333,0.9375,8.125,0.0,0.1875,0,0.0,0.147729,20.1,2.9,OAK,OAK,2015.0,4,Alabama,6-1,211.0,4.42,33.0,14.556322,120.0,6.71,3.98,SEC
197,40,Amari Cooper,OAK,WR,22.0,16.0,14.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,132.0,83.0,1153.0,13.89,5.0,2.0,0.0,5.0,2.0,0.0,151,232.3,235.3,190.8,25.0,12.0,40.0,2016.0,158.4,14.0,16.0,0.0,11.314286,14.51875,-3.204464,1.0,1.0,0.0,13.89,0.0,0.0,0.0,0.0,0.0,0.0,0.628788,0.060241,0.875,8.25,0.0,0.0625,OAK,0.0,0.152368,21.3,3.2,OAK,OAK,2015.0,4,Alabama,6-1,211.0,4.42,33.0,14.556322,120.0,6.71,3.98,SEC
198,108,Amari Cooper,OAK,WR,23.0,14.0,12.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,4.0,0.0,96.0,48.0,680.0,14.17,7.0,1.0,0.0,7.0,0.0,0.0,110,158.4,161.4,134.4,0.0,31.0,0.0,2017.0,215.5,15.0,16.0,16.0,14.366667,11.314286,3.052381,0.875,0.9375,0.625,14.17,4.0,0.0,0.0,0.0,0.0,0.0,0.5,0.145833,0.857143,6.857143,0.0,0.071429,OAK,0.0,0.122572,31.4,3.6,DAL,OAK,2015.0,4,Alabama,6-1,211.0,4.42,33.0,14.556322,120.0,6.71,3.98,SEC
199,51,Amari Cooper,2TM,WR,24.0,15.0,15.0,0.0,0.0,0.0,0.0,0.0,2.0,20.0,10.0,0.0,107.0,75.0,1005.0,13.4,7.0,2.0,2.0,7.0,0.0,0.0,141,215.5,220.5,178.0,24.0,19.0,51.0,2018.0,0.0,0.0,14.0,16.0,0.0,14.366667,0.0,0.9375,0.90625,0.645833,13.4,10.0,0.0,0.0,0.0,0.0,0.0,0.700935,0.093333,1.0,7.133333,0.0,0.133333,OAK,1.0,0.0,34.0,4.4,DAL,OAK,2015.0,4,Alabama,6-1,211.0,4.42,33.0,14.556322,120.0,6.71,3.98,SEC
6065,0,Amari Cooper,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,40.8,4.4,OAK,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [149]:
team_temp = team_assembly(2008, 2019)

tm_dict = {'Pittsburgh': 'PIT',
 'Philadelphia': 'PHI',
 'New England': 'NWE',
 'Minnesota': 'MIN',
 'Carolina': 'CAR',
 'LA Rams': 'LAR',
 'New Orleans': 'NOR',
 'Jacksonville': 'JAX',
 'Kansas City': 'KAN',
 'Atlanta': 'ATL',
 'LA Chargers': 'LAC',
 'Seattle': 'SEA',
 'Buffalo': 'BUG',
 'Dallas': 'DAL',
 'Tennessee': 'TEN',
 'Detroit': 'DET',
 'Baltimore': 'BAL',
 'Arizona': 'ARI',
 'Washington': 'WAS',
 'Green Bay': 'GNB',
 'Cincinnati': 'CIN',
 'Oakland': 'OAK',
 'San Francisco': 'SFO',
 'Miami': 'MIA',
 'Denver': 'DEN',
 'NY Jets': 'NYJ',
 'Tampa Bay': 'TAM',
 'Chicago': 'CHI',
 'Indianapolis': 'IND',
 'Houston': 'HOU',
 'NY Giants': 'NYG',
 'Cleveland': 'CLE'}

team_temp['Tm'] = team_temp['Team'].apply(lambda x: tm_dict[x])
team_temp = team_temp[['Tm', 'Win_PCT', 'Year']]
team_temp['Win_PCT'] = team_temp['Win_PCT'].apply(lambda x: float(x[0:len(x)-1]))

In [185]:
real_final = final_frame.merge(team_temp, on = ['Year', 'Tm'], how = 'outer')
real_final = real_final.loc[pd.notna(real_final.Rk)]
real_final.Win_PCT = real_final.Win_PCT.fillna(0.500)#[pd.notna(real_final.Win_PCT)]#[['Name', 'Tm', 'Year', 'Win_PCT']]
real_final = real_final.loc[(real_final.FantPos == 'QB') 
         | (real_final.FantPos == 'WR')
        | (real_final.FantPos == 'RB')
        | (real_final.FantPos == 'TE')].reset_index(drop = True)
real_final.to_csv('final_frame_teams.csv', index = False)

