# To do
* Fix N'Keal Harry
* Fix Le'Veon Bell: in the data but not in analysis for 2018 bc missed a year due to holdout
    * Change joins so that those with ADP are kept

Can do with current data:
* Need to integrate the rookie analysis in the targets joining vs not joining
    * Does the team they were drafted by have a star at the same position?
    * Where were they picked in the draft?
    * Winning pct of team picked?
    * Typical college metrics including injury history

Requires new scraping:
* Add type of injury info, dummy variables for different injuries in the previous year(s)
* Get game level data: look at second half performance and how it affects ADP, a momentum metric
* New coach flag, maybe a coach name dummy variable or a quarter back name dummy

# Import packages and define functions

In [1]:
###################### Import Packages #############################################
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import statsmodels.formula.api as smf
from ffb_functions import *
pd.set_option('display.max_columns', 500)
pd.options.display.max_rows = 999

# Assemble pro football reference database and create metrics

In [2]:
start_year = 2004
end_year = 2020
database = pd.read_csv('profootballreference.csv')

## some players for IR reasons dont have a fantasy position for one year
# need to fill in with their other fantasy positions from other years
database.loc[database.Name == 'Travis Kelce', 'FantPos'] = 'TE' #on the IR his first year, positionless in the stats
database.loc[database.Name == 'Sam Bradford', 'FantPos'] = 'QB' #on the IR one year
database.loc[database.Name == 'Chad Johnson', 'FantPos'] = 'WR' #on the IR one year
database.loc[database.Name == 'Marvin Jones', 'FantPos'] = 'WR' #on the IR one year
database.loc[database.Name == 'Brandon Coleman', 'FantPos'] = 'WR' #on the IR one year
database.loc[database.Name == 'Chris Givens', 'FantPos'] = 'WR' #on the IR one year
database.loc[database.Name == 'Vincent Brown', 'FantPos'] = 'WR' #on the IR one year

# this needs to happen after name specific position changes
# exclude non skill positions
# only top 300 scorers
# only relevant years
# exclude the other adrian peterson
database = (database.loc[database.FantPos.isin(['QB', 'RB', 'WR', 'TE'])
                       & (database.Rk <= 400)
                       & (database.Year >= start_year)
                       & (database.Year <= end_year)
                       & ~((database.Name == 'Adrian Peterson') & (database.Tm == 'CHI'))]
                    .reset_index(drop = True))

## change columns to floats for metric creation
cols_to_change = ['VBD', 'PosRank', 'OvRank', 'Rk', 'Age'
                  , 'PPR', 'GS', 'G'
                  , 'PaTD', 'Cmp', 'Int', 'RuTD', 'ReTD'
                  , 'PaYds', 'RuYds', 'ReYds', 'Rec'
                  , 'RuY/A', 'ReYds/R', 'PaAtt', 'RuAtt', 'Tgt'
                  , '2PM', '2PP', 'Fmb', 'FL', 'TD.3']

for col in cols_to_change:
    database[col] = database[col].astype(float)


## create new metrics #############################################
# points next year for regressions
shifted = shift_col(database, 'pts_next_year', 'PPR', -1)
shifted['pts_next_year'] = shifted['pts_next_year'].replace(np.nan, 0)                # impute 0 if they arent in top 500 next year

## impute stuff early #####
shifted['VBD'] = shifted['VBD'].replace(np.nan, 999)
shifted['OvRank'] = shifted['OvRank'].replace(np.nan, 999)

# games next year for regressions
shifted = shift_col(shifted, 'g_next_year', 'G', -1)                                  # create G_next_year col for regressions
shifted['g_next_year'] = shifted['g_next_year'].replace(np.nan, 0)                    # impute 0 if they arent in top 500 next year

# games previous years for injury history
shifted = shift_col(shifted, 'G_prev_year', 'G', 1) 
shifted = shift_col(shifted, 'G_prev_year_prev_year', 'G_prev_year', 1)               

# create ppg next year for y var
shifted['ppg_next_year'] = shifted['pts_next_year'] / shifted['g_next_year']
shifted['ppg_next_year'] = shifted['ppg_next_year'].replace(np.nan, 0)
shifted['ppg_this_year'] = shifted['PPR'] / shifted['G']                              # current year ppg
shifted['delta_ppg'] = shifted['ppg_next_year'] - shifted['ppg_this_year']            # change in ppg

# create injury history metrics
shifted['season_frac_1'] = shifted['G'] / 16                                          # 1 year injury history
shifted['season_frac_2'] = (shifted['G'] + shifted['G_prev_year']) / 32               # 2 year injury history
shifted['season_frac_3'] = (shifted['G'] + shifted['G_prev_year_prev_year']) / 48     # 3 year injury history

# create per attempt metrics to reduce multicollinearity
# ReYds per reception is already in
# RuYds per attempt is already in
shifted['RuTD_per_Att'] = shifted['RuTD'] / shifted['RuAtt']                          # RuTds per attempt
shifted['PaYds_per_PaAtt'] = shifted['PaYds'] / shifted['PaAtt']                      # PaYds per attempt
shifted['PaTD_per_PaAtt'] = shifted['PaTD'] / shifted['PaAtt']                        # PaTds per attempt
shifted['Cmp_per_PaAtt'] = shifted['Cmp'] / shifted['PaAtt']                          # Completions per attempt
shifted['Int_per_PaAtt'] = shifted['Int'] / shifted['PaAtt']                          # Interceptions per attempt
shifted['Rec_per_tgt'] = shifted['Rec'] / shifted['Tgt']                              # Receptions per target
shifted['ReTD_per_rec'] = shifted['ReTD'] / shifted['Rec']                            # ReTD's per reception

# fraction of games played that they started
shifted['start_frac'] = shifted['GS'] / shifted['G']

# Create some per game metrics
for metric in ['Tgt', 'PaAtt', 'RuAtt']:
    new_str = metric + '_per_game'
    shifted[new_str] = shifted[metric] / shifted['G']
    
## share of team's fantasy points
# get fantasy points of team
gb = shifted.groupby(['Tm', 'Year']).sum().reset_index()[['Tm', 'Year', 'PPR']]
gb = gb[(gb.Tm != '2TM') & (gb.Tm != '3TM') & (gb.Tm != '4TM')]
# compare fantasy points of player to fantasy points of team
shifted['Fant_Share'] = 0
for i in range(len(shifted)):
    if 'TM' not in shifted.Tm[i]:
        temp_tm = shifted.loc[i, 'Tm']
        temp_yr = shifted.loc[i, 'Year']
        gb_val = gb.loc[(gb.Tm == temp_tm) & (gb.Year == temp_yr), 'PPR']
        shifted.loc[i, 'Fant_Share'] = shifted.loc[i, 'PPR'] / gb_val.iloc[0]
    else:
        shifted.loc[i, 'Fant_Share'] = 0 #this isnt technically true, may need to change this
        
# impute season frac by position, impute games prev by position
cols = ['season_frac_2', 'season_frac_3', 'G_prev_year', 'G_prev_year_prev_year']
for i in cols:
    shifted[i] = shifted[i].astype(float)
    shifted[i] = shifted[i].fillna(shifted.groupby('FantPos')[i].transform('mean'))


#shifted['FantPos'] = shifted['FantPos'].fillna(shifted.groupby('FantPos')[i].transform('mean'))
# hopefully figure out a clever way to impute modes here. for now, fix this thing

# impute all other stats with 0, because remaining nans are from 0 passes etc
shifted = shifted.fillna(0)

# save output so far
shifted.to_csv('profootballfocus_withmetrics.csv', index = False)
shifted.groupby('Year').count()

Unnamed: 0_level_0,Rk,Name,Tm,FantPos,Age,G,GS,Cmp,PaAtt,PaYds,PaTD,Int,RuAtt,RuYds,RuY/A,RuTD,Rec,ReYds,ReYds/R,ReTD,Fmb,FL,TD.3,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank,Tgt,pts_next_year,g_next_year,G_prev_year,G_prev_year_prev_year,ppg_next_year,ppg_this_year,delta_ppg,season_frac_1,season_frac_2,season_frac_3,RuTD_per_Att,PaYds_per_PaAtt,PaTD_per_PaAtt,Cmp_per_PaAtt,Int_per_PaAtt,Rec_per_tgt,ReTD_per_rec,start_frac,Tgt_per_game,PaAtt_per_game,RuAtt_per_game,Fant_Share
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1
2004,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399
2005,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396
2006,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397
2007,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396
2008,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397,397
2009,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400
2010,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398
2011,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398
2012,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398,398
2013,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399,399


# Assemble and merge ADP database (ppr)

In [3]:
## new adp.csv gets us many more observations per year and more years, in return lose std dev variable
adp_frame_0 = pd.read_csv('adp_latest.csv')

# keep relevant years
# exclude mike williams
adp_frame_0 = (adp_frame_0
               .loc[(adp_frame_0.Year >= start_year)
                    & (adp_frame_0.Year <= end_year + 1)
                   & (adp_frame_0.Name != 'Mike Williams')]
               .reset_index(drop = True))

adp_frame_0['Year'] = adp_frame_0['Year'] - 1

adp_frame_0 = adp_frame_0.replace('NEP', 'NWE')
adp_frame_0 = adp_frame_0.replace('RAM', 'STL')

# Dont include pos in the join, too many differences of wr's at te's
# merge the databases, inner bc only want fantasy relevant players
frame_w_adp = shifted.merge(adp_frame_0, on = ['Name', 'Year'], how = 'inner')
for_graphs = shifted.merge(adp_frame_0, on = ['Name', 'Year'], how = 'outer')
frame_w_adp.to_csv('for_graphs.csv', index = False)
frame_w_adp = frame_w_adp.rename(columns = {'Tm_x': 'Old_Team', 'Tm_y':'New_Team'})


# flag if team in adp database is different from pff database, means switched teams
frame_w_adp['Tm_change_flag'] = (frame_w_adp
                                 .apply(lambda x: new_team(x['Old_Team'], x['New_Team'])
                                        , axis = 1))


########### keep this bit because STD DEV is an important variable ###################
## need to look into this one
# so when scraping, need to visit the webpage before scraping or else it wont work
adp_frame_alt = pd.read_csv('std.csv')

# maybe merge on pos too? cant merge on team because players switch from profootballfocus to adp
# merge the databases, inner bc only want fantasy relevant players
frame_w_adp_2 = frame_w_adp.merge(adp_frame_alt, on = ['Name', 'Year'], how = 'left')

frame_w_adp_2['Std.Dev'] = frame_w_adp_2['Std.Dev'].fillna(frame_w_adp_2['Std.Dev'].mean())

## see the number of targets leaving or entering a team
tgt_avail = (frame_w_adp
             .groupby(['Year', 'Old_Team'])
             .sum()
             .reset_index()[['Year', 'Old_Team', 'Tgt']]
             .rename(columns = {'Old_Team': 'New_Team', 'Tgt': 'Prev_Tgt'}))

opportunity = (frame_w_adp
               .groupby(['Year', 'New_Team'])
               .sum()
               .reset_index()[['Year','New_Team', 'Tgt']])


opp_frame = tgt_avail.merge(opportunity, on = ['Year', 'New_Team'], how = 'outer')

opp_frame['opp_difference'] = opp_frame['Prev_Tgt'] - opp_frame['Tgt']

frame_w_adp = (frame_w_adp
               .merge(opp_frame[['Year', 'New_Team', 'opp_difference']]
                      , on = ['Year', 'New_Team']
                      , how = 'outer'))

## see the number of rushing attempts leaving or entering a team
ru_avail = (frame_w_adp
            .groupby(['Year', 'Old_Team'])
            .sum()
            .reset_index()[['Year', 'Old_Team', 'RuAtt']]
            .rename(columns = {'Old_Team': 'New_Team', 'RuAtt':'Prev_Ru'}))

new_ru = (frame_w_adp
          .groupby(['Year', 'New_Team'])
          .sum()
          .reset_index()[['Year', 'New_Team', 'RuAtt']])

ru_frame = new_ru.merge(ru_avail, on = ['Year', 'New_Team'], how = 'outer')

ru_frame['ru_opp'] = ru_frame['Prev_Ru'] - ru_frame['RuAtt']

frame_w_adp = frame_w_adp.merge(ru_frame[['Year', 'New_Team', 'ru_opp']], on = ['Year', 'New_Team'], how = 'outer')

frame_w_adp = frame_w_adp[pd.notna(frame_w_adp.Rk)].reset_index(drop = True)

In [4]:
frame_w_adp.groupby('Year').count()
#for_graphs.groupby('Year').count()

Unnamed: 0_level_0,Rk,Name,Old_Team,FantPos,Age,G,GS,Cmp,PaAtt,PaYds,PaTD,Int,RuAtt,RuYds,RuY/A,RuTD,Rec,ReYds,ReYds/R,ReTD,Fmb,FL,TD.3,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank,Tgt,pts_next_year,g_next_year,G_prev_year,G_prev_year_prev_year,ppg_next_year,ppg_this_year,delta_ppg,season_frac_1,season_frac_2,season_frac_3,RuTD_per_Att,PaYds_per_PaAtt,PaTD_per_PaAtt,Cmp_per_PaAtt,Int_per_PaAtt,Rec_per_tgt,ReTD_per_rec,start_frac,Tgt_per_game,PaAtt_per_game,RuAtt_per_game,Fant_Share,New_Team,Overall,Tm_change_flag,opp_difference,ru_opp
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
2012,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,195,195
2013,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,199,199
2014,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,228,213,213
2015,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,213,196,196
2016,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,206,206
2017,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,215,215
2018,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,215,215


# Assemble and merge college database and draft/combine database

In [5]:
# college data pull 1: draft pick: college, age, some stats
rk_0 = pd.read_csv('data/draft.csv')

# keep relevant years
# two adrian petersons, only want oklahoma one
rk_0 = (rk_0
        .loc[(rk_0.Year >= start_year - 4)
            & (rk_0.Year <= end_year + 1)
            & ~((rk_0.Name == 'Adrian Peterson') & (rk_0.College == 'Georgia Southern'))]
        .reset_index(drop = True))

rk_0['Name'] = rk_0['Name'].str.replace('DJ Chark', 'D.J. Chark')
rk_0.loc[rk_0.Name == 'Devin Funchess', 'FantPos'] = 'WR'

# college data pull 2: combine numbers: school, height, weight, combine stats, position, name
comb_0 = pd.read_csv('data/combine.csv')

# keep relevant years
# two adrian petersons
comb_0 = (comb_0
          .loc[(comb_0.Year >= start_year - 4)
                & (comb_0.Year <= end_year + 1)
                & ~((comb_0.Name == 'Adrian Peterson') & (comb_0.School == 'Georgia Southern'))
              ]
          .reset_index(drop = True))

# merge rookie names, draft capital with combine statistics
draft_and_combine = rk_0.merge(comb_0, on = ['Name', 'FantPos', 'Year'], how = 'outer')
draft_and_combine = draft_and_combine.loc[draft_and_combine.Name != 'Mike Williams'].reset_index(drop = True)


## merge to big frame with adp
for_redraft = draft_and_combine[['Name', 'Pick', 'FantPos', 'College', 'height', 'Wt'
                    , 'Dash', 'Vertical', 'Bench', 'Broad_Jump', 'Three_Cone', 'Shuttle']]

final_frame_0 = frame_w_adp.merge(for_redraft, on = ['Name', 'FantPos'], how = 'outer')
final_frame = final_frame_0.loc[pd.notna(final_frame_0.Old_Team)].reset_index(drop = True)

## positionally impute mean values for combine
cols = ['Wt', 'height', 'Dash', 'Vertical', 'Bench', 'Broad_Jump', 'Three_Cone', 'Shuttle']
for i in cols:
    final_frame[i] = final_frame[i].astype(float)
    final_frame[i] = final_frame[i].fillna(final_frame.groupby("FantPos")[i].transform('mean'))

## impute 0's for everything else
final_frame = final_frame.fillna(0)
final_frame = final_frame.rename(columns = {'Year_x': 'Year'
                                           , 'Tm': 'Other_Tm'
                                            , 'Tm_x': 'Tm'
                                            , 'Tm_y': 'New_Tm'
                                            , 'New_Team':'Tm'
                                           })

final_frame.to_csv('big_redraft_frame.csv', index = False)

In [6]:
#final_frame.loc[final_frame.Shuttle == 0]
final_frame.loc[final_frame.Name == 'Amari Cooper']
#frame_w_adp.loc[frame_w_adp.Name.str.contains("Harry")]
#adp_frame.loc[adp_frame.Name == 'Antonio Brown']
#test2.loc[test2.Name == 'Antonio Brown']
final_frame.loc[final_frame.FantPos == 0]
final_frame.loc[final_frame.Pick == 0]
frame_w_adp.loc[frame_w_adp.Name.str.contains("Green-Ellis")]
rk_0.loc[rk_0.Name.str.contains("Wes")]

Unnamed: 0,Pick,Tm,Name,Age,College,Year,FantPos
187,91,PHI,Brian Westbrook,23.0,Villanova,2002,RB
241,236,BAL,Wes Pate,,S.F. Austin,2002,QB
1160,94,CLE,Terrance West,23.0,Towson,2014,RB
1397,110,JAX,Dede Westbrook,23.0,Oklahoma,2017,WR
1567,121,NYJ,Trevon Wesco,23.0,West Virginia,2019,TE


# Assemble and merge data from teams to get winning pct

In [7]:
team_temp = pd.read_csv('data/teams.csv')

team_temp = (team_temp
             .loc[(team_temp.Year >= start_year)
                    & (team_temp.Year <= end_year)]
             .reset_index(drop = True))

real_final = final_frame.merge(team_temp, on = ['Year', 'Tm'], how = 'outer')

real_final = (real_final
              .loc[pd.notna(real_final.Rk)]
              .reset_index(drop = True))

#print(real_final.loc[real_final.Name == 'Julian Edelman'])

real_final.Win_PCT = real_final.Win_PCT.fillna(0.500)

real_final = (real_final.loc[(real_final.FantPos == 'QB') 
                             | (real_final.FantPos == 'WR')
                            | (real_final.FantPos == 'RB')
                            | (real_final.FantPos == 'TE')]
                        .reset_index(drop = True))

real_final.to_csv('final_frame_teams.csv', index = False)

In [8]:
team_temp.loc[team_temp.Tm == 'NWE']
real_final.loc[real_final.Name.str.contains("Antonio Brown")]
#real_final.groupby(['Year']).count()

Unnamed: 0,Rk,Name,Old_Team,FantPos,Age,G,GS,Cmp,PaAtt,PaYds,PaTD,Int,RuAtt,RuYds,RuY/A,RuTD,Rec,ReYds,ReYds/R,ReTD,Fmb,FL,TD.3,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank,Year,Tgt,pts_next_year,g_next_year,G_prev_year,G_prev_year_prev_year,ppg_next_year,ppg_this_year,delta_ppg,season_frac_1,season_frac_2,season_frac_3,RuTD_per_Att,PaYds_per_PaAtt,PaTD_per_PaAtt,Cmp_per_PaAtt,Int_per_PaAtt,Rec_per_tgt,ReTD_per_rec,start_frac,Tgt_per_game,PaAtt_per_game,RuAtt_per_game,Fant_Share,Tm,Overall,Tm_change_flag,opp_difference,ru_opp,Pick,College,height,Wt,Dash,Vertical,Bench,Broad_Jump,Three_Cone,Shuttle,Win_PCT
206,9.0,Antonio Brown,PIT,WR,30.0,15.0,15.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,104.0,1297.0,12.47,15.0,0.0,0.0,15.0,0.0,0.0,220.0,323.7,326.7,271.7,103.0,2.0,9.0,2018.0,168.0,16.1,1.0,14.0,15.0,16.1,21.58,-5.48,0.9375,0.90625,0.625,0.0,0.0,0.0,0.0,0.0,0.619048,0.144231,1.0,11.2,0.066667,0.0,0.190557,FA,23.71,1.0,0.0,0.0,195.0,Central Michigan,70.0,186.0,4.56,33.5,13.0,105.0,6.98,4.18,0.5
827,1.0,Antonio Brown,PIT,WR,27.0,16.0,16.0,0.0,0.0,0.0,0.0,0.0,3.0,28.0,9.33,0.0,136.0,1834.0,13.49,10.0,3.0,2.0,11.0,2.0,0.0,250.0,388.2,393.2,320.2,126.0,1.0,1.0,2015.0,193.0,307.3,15.0,16.0,16.0,20.486667,24.2625,-3.775833,1.0,1.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.704663,0.073529,1.0,12.0625,0.0,0.1875,0.251035,PIT,1.35,0.0,-77.0,-93.0,195.0,Central Michigan,70.0,186.0,4.56,33.5,13.0,105.0,6.98,4.18,62.5
1077,113.0,Antonio Brown,PIT,WR,24.0,13.0,10.0,0.0,2.0,0.0,0.0,1.0,7.0,24.0,3.43,0.0,66.0,787.0,11.92,5.0,4.0,2.0,5.0,0.0,0.0,105.0,171.1,177.1,139.1,999.0,39.0,999.0,2012.0,106.0,314.9,16.0,16.0,9.0,19.68125,13.161538,6.519712,0.8125,0.90625,0.458333,0.0,0.0,0.0,0.0,0.5,0.622642,0.075758,0.769231,8.153846,0.153846,0.538462,0.129553,PIT,59.57,0.0,116.0,55.0,195.0,Central Michigan,70.0,186.0,4.56,33.5,13.0,105.0,6.98,4.18,50.0
1083,18.0,Antonio Brown,PIT,WR,25.0,16.0,14.0,1.0,1.0,15.0,0.0,0.0,7.0,4.0,0.57,0.0,110.0,1499.0,13.63,8.0,1.0,0.0,9.0,0.0,0.0,199.0,314.9,317.9,259.9,77.0,8.0,18.0,2013.0,167.0,386.9,16.0,13.0,16.0,24.18125,19.68125,4.5,1.0,0.90625,0.666667,0.0,15.0,0.0,1.0,0.0,0.658683,0.072727,0.875,10.4375,0.0625,0.4375,0.230325,PIT,21.35,0.0,86.0,-179.0,195.0,Central Michigan,70.0,186.0,4.56,33.5,13.0,105.0,6.98,4.18,50.0
1089,4.0,Antonio Brown,PIT,WR,26.0,16.0,16.0,2.0,2.0,20.0,1.0,0.0,4.0,13.0,3.25,0.0,129.0,1698.0,13.16,13.0,2.0,2.0,14.0,1.0,0.0,252.0,386.9,391.9,322.4,129.0,1.0,4.0,2014.0,181.0,388.2,16.0,16.0,13.0,24.2625,24.18125,0.08125,1.0,1.0,0.604167,0.0,10.0,0.5,1.0,0.0,0.712707,0.100775,1.0,11.3125,0.125,0.25,0.249855,PIT,4.3,0.0,-6.0,-62.0,195.0,Central Michigan,70.0,186.0,4.56,33.5,13.0,105.0,6.98,4.18,68.8
1096,12.0,Antonio Brown,PIT,WR,28.0,15.0,15.0,0.0,0.0,0.0,0.0,0.0,3.0,9.0,3.0,0.0,106.0,1284.0,12.11,12.0,0.0,0.0,12.0,0.0,0.0,207.0,307.3,310.3,254.3,81.0,3.0,12.0,2016.0,154.0,310.3,14.0,16.0,16.0,22.164286,20.486667,1.677619,0.9375,0.96875,0.645833,0.0,0.0,0.0,0.0,0.0,0.688312,0.113208,1.0,10.266667,0.0,0.2,0.199701,PIT,3.98,0.0,0.0,0.0,195.0,Central Michigan,70.0,186.0,4.56,33.5,13.0,105.0,6.98,4.18,68.8
1102,7.0,Antonio Brown,PIT,WR,29.0,14.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,101.0,1533.0,15.18,9.0,4.0,0.0,9.0,1.0,0.0,209.0,310.3,313.3,259.8,98.0,2.0,7.0,2017.0,163.0,323.7,15.0,15.0,16.0,21.58,22.164286,-0.584286,0.875,0.90625,0.625,0.0,0.0,0.0,0.0,0.0,0.619632,0.089109,1.0,11.642857,0.0,0.0,0.207892,PIT,5.76,0.0,84.0,6.0,195.0,Central Michigan,70.0,186.0,4.56,33.5,13.0,105.0,6.98,4.18,81.2


# make team relational dataframe

In [9]:
team_frame = real_final.groupby(['Tm', 'Year', 'opp_difference', 'ru_opp']).count().reset_index()
team_frame = team_frame[['Tm', 'Year', 'opp_difference', 'ru_opp']]
# need to send this to the rookie analysis
team_frame.to_csv('redraft_team_frame.csv', index = False)