In [1]:
import pandas as pd
import os
from functools import partial

In [2]:
adp_to_pfr = {'ARI':'ARI','ATL':'ATL','BAL':'BAL','BUF':'BUF','CAR':'CAR',
                'CHI':'CHI','CIN':'CIN','CLE':'CLE','DAL':'DAL','DEN':'DEN',
                'DET':'DET','GB':'GNB','HOU':'HOU','IND':'IND','JAX':'JAX',
                'KC':'KAN','LAC':'LAC','LAR':'LAR','LV':'LVR','MIA':'MIA',
                'MIN':'MIN','NO':'NOR','NE':'NWE','NYG':'NYG','NYJ':'NYJ',
                'PHI':'PHI','PIT':'PIT','SEA':'SEA','SF':'SFO','TB':'TAM',
                'TEN':'TEN','WAS':'WAS'}

In [200]:
def import_adp_data(files_loc = './Data/historical_adp/'):
    df_dict = {}
    for i in range(2014, 2023):
        cols = ["Name", "Team", "Position", "PositionRank", "AverageDraftPositionPPR"]
        tmp= pd.read_csv(f'{files_loc}ppr-adp-{i}.csv',
                        usecols = cols)
        tmp['Year'] = i
        df_dict[i] = tmp
    return df_dict

def prep_adp_df(adp_data, adp_to_pfr, position = True):
    '''
    position
    -If True, prepares dataset for regression based on draft position
        -Only keeps if ADP PPR < 173, so that all players without draft position
    '''
    # Limit to only top 200 in ADP per year

    # 1. Concat
    adp_df = pd.concat(adp_data.values())
    # 2. Re-order columns
    adp_df = adp_df[['Name', 'Year', 'Team', 'Position', 'PositionRank', 'AverageDraftPositionPPR']]

    # 3. Get position rank as a number
    adp_df['PositionRank'] = adp_df['PositionRank'].str.extract('(\d+)')[0]
    
    # 4. Reset index
    adp_df.reset_index(inplace=True)
    adp_df.drop('index', axis = 1, inplace=True)
    # adp_df = adp_df.join(pd.get_dummies(adp_df['Position']))

    # 5. Remove III's from end of names
    adp_df['Name'] = adp_df['Name'].str.replace('([I ]+$)', '',regex= True)
    adp_df['Name'] = adp_df['Name'].str.replace('CJ ', 'C.J. ')
    adp_df['Name'] = adp_df['Name'].str.replace('DJ ', 'D.J. ')
    adp_df['Name'] = adp_df['Name'].str.replace('DK ', 'D.K. ')
    adp_df['Name'] = adp_df['Name'].str.replace('Steve Smith', 'Steve Smith Sr.')
    adp_df['Name'] = adp_df['Name'].str.replace('Marvin Jones Jr.', 'Marvin Jones')
    adp_df['Name'] = adp_df['Name'].str.replace('Darrell Henderson Jr.', 'Darrell Henderson')
    adp_df['Name'] = adp_df['Name'].str.replace('Gabe Davis', 'Gabriel Davis')
    # adp_df = adp_df[adp_df['AverageDraftPositionPPR'] < 173].copy()
    # Changing to 170 to have consistent cutoff for position-based regression

    adp_df = adp_df[adp_df['Position'].isin(['RB','WR','QB','WR','TE'])]

    adp_df['Team'] = adp_df['Team'].replace(adp_to_pfr) 
    adp_df.loc[(adp_df['Team'] == 'LVR') & (adp_df['Year'] <= 2019),'Team'] = 'OAK'
    adp_df.loc[(adp_df['Team'] == 'LAC') & (adp_df['Year'] <= 2016),'Team'] = 'SDG'
    adp_df.loc[(adp_df['Team'] == 'LAR') & (adp_df['Year'] <= 2015),'Team'] = 'STL'
    return adp_df 
    

In [201]:
df_dict = import_adp_data()
adp_df = prep_adp_df(df_dict, adp_to_pfr)

# Check Teams are correct
# test = adp_df[['Team','Year']].drop_duplicates().sort_values(['Year','Team'])
# a = test.groupby(['Team'], as_index = False).min()[['Team','Year']]
# b = test.groupby(['Team'], as_index = False).max()[['Team','Year']]
# a.merge(b, on = 'Team')

  adp_df['Name'] = adp_df['Name'].str.replace('Marvin Jones Jr.', 'Marvin Jones')
  adp_df['Name'] = adp_df['Name'].str.replace('Darrell Henderson Jr.', 'Darrell Henderson')


In [237]:
from bs4 import BeautifulSoup
import requests
import re
from datetime import datetime
import pickle

PFR_LINK = 'https://www.pro-football-reference.com/years/{yr}/fantasy.htm'

def player_info_from_link(link):
    page = requests.get(link)
    if page.status_code == 200:
        soup = BeautifulSoup(page.content, 'html.parser')
    player_info  = soup.find_all('p')
    # print(player_info)
    info_dict = {}
    for i, info in enumerate(player_info):
        # print(i)
        if len(info.find_all('strong', text = 'Position')) > 0 :
            info_dict['pos_raw'] = info.text.split(' ')[1].strip()[:2]
        if len(info.find_all('strong', text = 'Born:')) > 0 :
            info_dict['born_raw'] = (info.text.replace('\n', ' ').replace('\xa0', ' '))
        if len(info.find_all('strong', text = 'Draft')) > 0:
            info_dict['draft_raw'] = info.text
    
    if 'born_raw' in info_dict.keys():
        dob = re.search('\w+\s\d{1,2},\s\d{4}', info_dict['born_raw'])
        dob = dob.group(0)
        dob_res = datetime.strptime(dob, '%B %d, %Y')
        info = [dob_res]
    else:
        info = [None]

    if 'draft_raw' in info_dict.keys():
        draft_info = info_dict['draft_raw']
        draft_pos = re.search('\([0-9]*', draft_info)
        info.append(int(draft_pos.group(0)[1:]))
    else:
        info.append(270)

    if 'pos_raw' in info_dict.keys():
        info.append(info_dict['pos_raw'])
    else:
        print("No position info for this player...")
        print()
    return info
        

def get_player_info(link = PFR_LINK):
    player_dict = {}
    for i in range(2013, 2022):
        use_link = link.format(yr = i)
        print(use_link)
        
        page = requests.get(use_link)
        if page.status_code == 200:
            soup = BeautifulSoup(page.content, 'html.parser')
        table = soup.find_all('table', id = 'fantasy')
        players = table[0].find_all('tr')
        for player in players:
            res = player.find_all('a')
            if len(res) > 0:
                a = player.find_all('td', class_="right")
                name = res[0].text 
                position = a[0].text
                if (name, position) not in player_dict:
                    print(f"Added {(name, position)} to dictionary")
                    player_link = 'https://www.pro-football-reference.com' + res[0]['href']
                    info = player_info_from_link(player_link)

                    player_dict[name, position] = info
    return player_dict
            
player_dict = get_player_info()
with open('player_info.p', 'wb') as handle:
    pickle.dump(player_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

https://www.pro-football-reference.com/years/2013/fantasy.htm
Added ('Jamaal Charles', 'RB') to dictionary
Added ('LeSean McCoy', 'RB') to dictionary
Added ('Peyton Manning', 'QB') to dictionary
Added ('Matt Forte', 'RB') to dictionary
Added ('Jimmy Graham', 'TE') to dictionary
Added ('Marshawn Lynch', 'RB') to dictionary
Added ('Knowshon Moreno', 'RB') to dictionary
Added ('Josh Gordon', 'WR') to dictionary
Added ('Demaryius Thomas', 'WR') to dictionary
Added ('Calvin Johnson', 'WR') to dictionary
Added ('Drew Brees', 'QB') to dictionary
Added ('A.J. Green', 'WR') to dictionary
Added ('Brandon Marshall', 'WR') to dictionary
Added ('Antonio Brown', 'WR') to dictionary
Added ('Eddie Lacy', 'RB') to dictionary
Added ('Dez Bryant', 'WR') to dictionary
Added ('DeMarco Murray', 'RB') to dictionary
Added ('Adrian Peterson', 'RB') to dictionary
Added ('Alshon Jeffery', 'WR') to dictionary
Added ('Eric Decker', 'WR') to dictionary
Added ('Chris Johnson', 'RB') to dictionary
Added ('Vernon Davi

In [240]:
def getDrafteeInfo(player_dict, link = 'https://www.pro-football-reference.com/years/2022/draft.htm'):
    page = requests.get(link)
    if page.status_code == 200:
        soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find_all('table', id = 'drafts')
    players = table[0].find_all('tr')
    for player in players:
        res = player.find_all('a')
        if len(res) > 0:
            a = player.find_all('td')
            name = res[1].text 
            position = a[3].text
            print(name, position)
            if (name, position) not in player_dict:
                print(f"Added {(name, position)} to dictionary")
                player_link = 'https://www.pro-football-reference.com' + res[1]['href']
                info = player_info_from_link(player_link)

                player_dict[(name, position)] = info
    return player_dict

# player_dict1 = getDrafteeInfo(player_dict)
# player_dict1 = getDrafteeInfo(player_dict, link = 'https://www.pro-football-reference.com/years/2021/draft.htm')
# with open('player_info_1.p', 'wb') as handle:
#     pickle.dump(player_dict1, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [242]:
from bs4 import BeautifulSoup
import requests

PFR_LINK = 'https://www.pro-football-reference.com/years/{yr}/fantasy.htm'
def import_pts_data(link = PFR_LINK):
    fpts_dict = {}
    for i in range(2013, 2022):
        use_link = link.format(yr = i)
        print(use_link)
        
        page = requests.get(use_link)
        if page.status_code == 200:
            soup = BeautifulSoup(page.content, 'html.parser')
        table = soup.find_all('table', id = 'fantasy')
        df = pd.read_html(str(table), flavor = 'html5lib')[0]
        df.columns = df.columns.get_level_values(1)
        df['Year'] = i
        fpts_dict[i] = df
    return fpts_dict

fpts_dict = import_pts_data(PFR_LINK)

https://www.pro-football-reference.com/years/2013/fantasy.htm
https://www.pro-football-reference.com/years/2014/fantasy.htm
https://www.pro-football-reference.com/years/2015/fantasy.htm
https://www.pro-football-reference.com/years/2016/fantasy.htm
https://www.pro-football-reference.com/years/2017/fantasy.htm
https://www.pro-football-reference.com/years/2018/fantasy.htm
https://www.pro-football-reference.com/years/2019/fantasy.htm
https://www.pro-football-reference.com/years/2020/fantasy.htm
https://www.pro-football-reference.com/years/2021/fantasy.htm


In [243]:
pfref_cols = ['Rk', 'Player', 'Tm', 'FantPos', 'Age', 'G', 'GS', 'PassCmp', 'PassAtt', 'PassYds',
                    'PassTD', 'PassInt', 'RushAtt', 'RushYds', 'RushY/A', 'RushTD', 'RecTgt', 'Rec', 'RecYds', 'RecY/R',
                    'RecTD', 'Fmb', 'FL', 'TD', '2PM', '2PP', 'FantPt', 'PPR', 'DKPt', 'FDPt',
                    'VBD', 'PosRank', 'OvRank', 'Year']

score_dict = {'PassYds' : 0.04,
                'PassTD' : 4,
                'PassInt' : -2,
                'RushYds' : 0.1,
                'RushTD' : 6,
                'Rec': 0.5,
                'RecYds' : 0.1,
                'RecTD' : 6,
                'FL' : -2,
                '2PM' : 2,
                '2PP' : 2
                }

def score_row(row):
    sum = 0.0
    for cat, score in score_dict.items():
        addval = float(row[cat]) * score
        sum += addval
    return sum

def prep_pts_df(fpts_dict, pfref_cols = pfref_cols, score_dict = score_dict):
    # Concatenate, rename cols, drop filler rows, reset index
    df = pd.concat(fpts_dict.values())
    print("Initial dataframe shape")
    print(df.shape)
    df.columns = pfref_cols
    df = df.drop(df[df['Player'] == 'Player'].index) 
    print("Dataframe shape after removing filler rows")
    print(df.shape)
    df.reset_index(inplace=True)
    df.drop(['index', 'Rk'], axis = 1, inplace = True)

    # Convert numerics, fill nas with 0, then score
    score_cols = list(score_dict.keys()) + ['FantPt', 'PPR']
    df[score_cols] = df[score_cols].apply(pd.to_numeric)
    score_dict2 = {k : 0 for (k, v) in score_dict.items()}
    df.fillna(score_dict2, inplace=True)
    
    # Score
    df['Pts_HPPR'] = df.apply(score_row, axis = 1)
    score_dict['Rec'] = 1
    df['Pts_PPR'] = df.apply(score_row, axis = 1)
    assert len(df[(df['Pts_PPR'] - df['PPR']) > 0.1]) == 0

    # Clean player names of * and +
    df['Player'] = df['Player'].str.replace('[\*\+]', '', regex=True).str.strip()

    # Position encodings
    df = df.join(pd.get_dummies(df['FantPos']))

    # Limit to guys with positions, everyone without position has 0 or less pts scored
    print(df.shape)
    # print(df[df['FantPos'].isnull()].sort_values('Pts_HPPR',ascending = False))
    df = df[df['FantPos'].notnull()].copy()
    print("After limiting to exclude guys with fantasy position, we see")
    print(df.shape)
    return df

pts_df = prep_pts_df(fpts_dict, pfref_cols, score_dict)
pts_df

Initial dataframe shape
(5685, 34)
Dataframe shape after removing filler rows
(5507, 34)
(5507, 39)
After limiting to exclude guys with fantasy position, we see
(4951, 39)


Unnamed: 0,Player,Tm,FantPos,Age,G,GS,PassCmp,PassAtt,PassYds,PassTD,...,VBD,PosRank,OvRank,Year,Pts_HPPR,Pts_PPR,QB,RB,TE,WR
0,Jamaal Charles,KAN,RB,27,15,15,0,0,0.0,0.0,...,182,1,1,2013,343.00,378.00,0,1,0,0
1,LeSean McCoy,PHI,RB,25,16,16,0,0,0.0,0.0,...,152,2,2,2013,304.60,330.60,0,1,0,0
2,Peyton Manning,DEN,QB,37,16,16,450,659,5477.0,55.0,...,151,1,3,2013,409.98,409.98,1,0,0,0
3,Matt Forte,CHI,RB,28,16,16,0,0,0.0,0.0,...,137,3,4,2013,300.30,337.30,0,1,0,0
4,Jimmy Graham,NOR,TE,27,16,12,0,0,0.0,0.0,...,124,1,5,2013,260.50,303.50,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5501,Logan Woodside,TEN,QB,26,5,0,0,0,0.0,0.0,...,,81,,2021,-0.60,-0.60,1,0,0,0
5503,Trenton Cannon,2TM,RB,27,12,0,0,0,0.0,0.0,...,,179,,2021,-1.60,-1.60,0,1,0,0
5504,John Wolford,LAR,QB,26,3,0,1,4,5.0,0.0,...,,84,,2021,-1.90,-1.90,1,0,0,0
5505,Josh Rosen,ATL,QB,24,4,0,2,11,19.0,0.0,...,,85,,2021,-3.24,-3.24,1,0,0,0


In [1937]:
# with open('player_info_1.p', 'rb') as input:
#     b = pickle.load(input)
# pfref_info = pd.DataFrame.from_dict(b, orient= 'index').reset_index()
# pfref_info.columns = ['Player','DOB','Draft']
# pfref_info['Player'] = pfref_info['Player'].str.strip()

# test = pfref_info.copy()
# test[test['Player'].str.contains('Bridge')]
    

Unnamed: 0,Player,DOB,Draft


In [244]:
baselines = {'QB': 10.0,
    'TE' : 10.0,
    'RB' : 23.0,
    'WR' : 27.0}
    
def find_baseline(row):
    pos = row['FantPos']
    if pos not in baselines.keys():
        return 0
    base = baselines[pos]
    if row['MyRk'] == base:
        return 1
    else:
        return 0

def set_baselines(pts_df):
    # Rank by position, year
    pts_df = pts_df.sort_values(['Year','FantPos','Pts_PPR'], ascending = [True, True, False])
    pts_df['MyRk'] = pts_df.groupby(['Year','FantPos'])['Pts_PPR'].rank('first', ascending = False)
    
    # Get baselines, create VBD
    pts_df['Baseline'] = pts_df.apply(find_baseline, axis = 1)
    bases = pts_df.loc[pts_df['Baseline'] == 1, ['FantPos', 'Year', 'Pts_PPR']]
    bases.columns = ['FantPos', 'Year', 'Base']

    pts_df = pts_df.merge(bases, on = ['FantPos', 'Year'], how = 'left')
    pts_df['MyVBD'] = pts_df['Pts_PPR'] - pts_df['Base']
    return pts_df, bases

In [245]:
pts_df_base, bases = set_baselines(pts_df)
# bases
# pts_df_base[(pts_df_base['FantPos'] == 'WR') & (pts_df_base['Year'] == 2021)].head(27)

In [246]:
pts_df_base['Year'].value_counts()

2021    596
2020    578
2019    558
2018    550
2016    542
2015    538
2017    536
2013    527
2014    526
Name: Year, dtype: int64

In [247]:
def getPosAvgVBD(pts_df_base):
    predTemplate = pts_df_base[['Player', 'FantPos', 'Year', 'Pts_PPR', 'MyVBD', 'QB','RB','TE','WR']]
    tmp = predTemplate.groupby(['FantPos', 'Year'],as_index = False).mean()[['FantPos', 'Year', 'MyVBD']]
    tmp = tmp.rename(columns = {'Year': 'fpYear', 'FantPos' : 'fpFantPos', 'MyVBD' : 'PosAvgVBD'})
    # predTemplate = predTemplate.merge(tmp, on = ['FantPos','Year'])
    return tmp
posAvgs = getPosAvgVBD(pts_df_base)
# posAvgs

In [248]:
import numpy as np

# 3147 merged on both sides
# 1277 left only but not 2013 (need to be in regression, and need to have x values interpolated)
# 596 are right only and from 2022 (need to be predicted later)
predTemplate = pts_df_base[['Player', 'FantPos', 'Year', 'Pts_HPPR', 'Pts_PPR', 'MyVBD', 'QB','RB','TE','WR']]
posAvgs = getPosAvgVBD(predTemplate)

def createPtsForReg(pts_df_base):
    # Create template
    predTemplate = pts_df_base[['Player', 'Tm', 'FantPos', 'Year', 'Pts_HPPR', 'Pts_PPR', 'MyVBD', 'QB','RB','TE','WR']]
    # Get average VBD for position, yr
        
    print("This is the shape of og dataset...")
    print(predTemplate.shape)
    
    # Merge on last year's results
    prvYr = predTemplate[['Player', 'FantPos', 'Year', 'Pts_HPPR', 'Pts_PPR', 'MyVBD']].copy()
    prvYr.rename(columns = {'Year' : 'PrvYear', 'Pts_HPPR': 'PrvPts_HPPR', 'Pts_PPR' : 'PrvPts_PPR', 'MyVBD': 'PrvMyVBD'}, inplace = True)
    prvYr['Year'] = prvYr['PrvYear'] + 1
    merged = predTemplate.merge(prvYr, on = ['Player','FantPos', 'Year'], how = 'outer', indicator= 'foundLastYearStats')
    
    print(merged.shape)
    # # Remove 2013 obs
    print(f"There are {len(merged[merged['Year'] == 2013])} observations associated with year 2013 - removed")
    merged = merged[merged['Year'] > 2013].copy()
    print(merged.shape)
    # Remove right_only obs that aren't from 2022
    print(f"In total, {len(merged[merged['foundLastYearStats'] == 'right_only'])} observations are players with ADP, but no previous year statistics")
    print(f"There are {len(merged[merged['PrvYear'] == 2021])} observations associated with year 2022")
    print(f"So remove the remaining {len(merged[(merged['PrvYear'] != 2021) & (merged['foundLastYearStats'] == 'right_only')])} observations")
    print("\t-These observations don't have a 'y-value' for regression, only 'x-values', so ok to delete")
    merged = merged[(merged['Year'] == 2022) | (merged['foundLastYearStats'] != 'right_only')]
    print(merged.shape)
    print(merged['foundLastYearStats'].value_counts())
    
    # Create found last year flag
    # Previous year update
    merged['PrvYear'] = merged['Year'] - 1
        
    # Left_only and both are needed for regression - excludes 2013 observations
    # Right_only needed for prediction - excludes non-2022 observations (right-only's in OG data)
    
    # # Fill in guys without info with average
    merged[['QB','RB','TE','WR']] = pd.get_dummies(merged['FantPos'])
    return merged

pts_df_reg = createPtsForReg(pts_df_base)
# Stick on position averages later
pts_df_reg

This is the shape of og dataset...
(4951, 11)
(6755, 16)
There are 527 observations associated with year 2013 - removed
(6228, 16)
In total, 1804 observations are players with ADP, but no previous year statistics
There are 596 observations associated with year 2022
So remove the remaining 1208 observations
	-These observations don't have a 'y-value' for regression, only 'x-values', so ok to delete
(5020, 16)
both          3147
left_only     1277
right_only     596
Name: foundLastYearStats, dtype: int64


Unnamed: 0,Player,Tm,FantPos,Year,Pts_HPPR,Pts_PPR,MyVBD,QB,RB,TE,WR,PrvYear,PrvPts_HPPR,PrvPts_PPR,PrvMyVBD,foundLastYearStats
527,Aaron Rodgers,GNB,QB,2014,354.14,354.14,84.64,1,0,0,0,2013,169.44,169.44,-91.48,both
528,Andrew Luck,IND,QB,2014,351.74,351.74,82.24,1,0,0,0,2013,292.58,292.58,31.66,both
529,Russell Wilson,SEA,QB,2014,330.10,330.60,61.10,1,0,0,0,2013,270.18,270.18,9.26,both
530,Peyton Manning,DEN,QB,2014,312.68,312.68,43.18,1,0,0,0,2013,409.98,409.98,149.06,both
531,Ben Roethlisberger,PIT,QB,2014,306.18,306.18,36.68,1,0,0,0,2013,258.84,258.84,-2.08,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6750,Malik Taylor,,WR,2022,,,,0,0,0,1,2021,0.40,1.40,-201.10,right_only
6751,Racey McMath,,WR,2022,,,,0,0,0,1,2021,-0.20,0.80,-201.70,right_only
6752,Maurice Ffrench,,WR,2022,,,,0,0,0,1,2021,0.30,0.30,-202.20,right_only
6753,Alex Bachman,,WR,2022,,,,0,0,0,1,2021,-0.30,-0.30,-202.80,right_only


In [249]:
def create_qb_chg(df):
    qb_now = df.loc[df['FantPos'] == 'QB'].groupby(['Tm', 'Year'], as_index = False).max()[['Tm', 'Year', 'Pts_PPR']]
    qb_now.rename(columns = {'Pts_PPR' : 'OldQBs'}, inplace= True)
    qb_now.loc[(qb_now['Tm'] == 'OAK') & (qb_now['Year'] == 2019), 'Tm'] = 'LVR'
    qb_now.loc[(qb_now['Tm'] == 'SDG') & (qb_now['Year'] == 2016), 'Tm'] = 'LAC'
    qb_now.loc[(qb_now['Tm'] == 'STL') & (qb_now['Year'] == 2015), 'Tm'] = 'LAR'
    qb_now['Year'] = qb_now['Year'] + 1
    qb_then = df.loc[df['FantPos'] == 'QB'].groupby(['Tm', 'Year'], as_index = False).max()[['Tm', 'Year', 'PrvPts_PPR']]
    qb_then.rename(columns = {'PrvPts_PPR' : 'NewQBs'}, inplace= True)
    final = qb_now.merge(qb_then)
    
    test = df.merge(final, on = ['Tm','Year'], how='inner')
    return test

In [289]:
def prepFinalReg(pts_df_reg, adp_df, posAvgs, bases):
    test = pts_df_reg.merge(adp_df[['Name', 'Year', 'Team', 'Position', 'AverageDraftPositionPPR']],
                        left_on = ['Player','Year','FantPos'], 
                        right_on = ['Name', 'Year', 'Position'],
                        how = 'outer',
                        indicator= 'foundAdp')
    
    test['PrvYear'] = test['Year'] - 1
    test['Player'].fillna(test['Name'], inplace=True)
    test.drop('Name',axis = 1, inplace= True)
    test['FantPos'].fillna(test['Position'], inplace=True)
    test.drop('Position',axis = 1, inplace= True)
    test['Tm'].fillna(test['Team'], inplace = True)
    test.drop('Team', axis = 1, inplace= True)

    test[['QB','RB','TE','WR']] = pd.get_dummies(test['FantPos'])
    test = test.merge(posAvgs, left_on = ['PrvYear', 'FantPos'], right_on = ['fpYear','fpFantPos'])
    test.drop(['fpYear', 'fpFantPos'], axis =1, inplace=True)
    test['foundLastYearStats'] = np.where(test['PrvMyVBD'].isnull(), 0, 1)
    test.loc[test['PrvMyVBD'].isnull(), 'PrvPts_PPR'] = 0
    test.loc[test['PrvMyVBD'].isnull(), 'PrvMyVBD'] = test.loc[test['PrvMyVBD'].isnull(), 'PosAvgVBD']

    test.loc[test['AverageDraftPositionPPR'] > 173, 'AverageDraftPositionPPR'] = 173
    test.loc[test['foundAdp'] == 'left_only', 'AverageDraftPositionPPR'] = 173
    test['foundAdp'] = np.where(test['AverageDraftPositionPPR'] < 173 , 1, 0)
    
    reg_set = test[['Player', 'Tm', 'FantPos','Year', 'PrvPts_PPR','PrvMyVBD','foundLastYearStats','AverageDraftPositionPPR', 'foundAdp', 'QB','RB','TE','WR', 'Pts_PPR','MyVBD']]
    
    reg_set = reg_set.merge(bases, on = ['FantPos','Year'], how = 'left')
    # reg_set = reg_set[reg_set['AverageDraftPositionPPR'] < 201].copy()
    reg_set['AverageDraftPositionPPRSq'] = reg_set['AverageDraftPositionPPR'] * reg_set['AverageDraftPositionPPR']

    reg_set = create_qb_chg(reg_set)
    
    pred_set = reg_set[reg_set['Year'] == 2022].copy()
    reg_set = reg_set[(reg_set['Year'] != 2022)].copy()
    reg_set.loc[reg_set['MyVBD'].isnull(), 'Pts_PPR'] = 0
    reg_set.loc[reg_set['MyVBD'].isnull(), 'MyVBD'] = 0 - reg_set.loc[reg_set['MyVBD'].isnull(), 'Base']
    return reg_set, pred_set

reg_set, pred_set = prepFinalReg(pts_df_reg, adp_df, posAvgs, bases)
pred_set[pred_set['Player'].str.contains('Etienne')]
# reg_set.loc[reg_set['Draft'].isnull()].head(25)
# pred_set

Unnamed: 0,Player,Tm,FantPos,Year,PrvPts_PPR,PrvMyVBD,foundLastYearStats,AverageDraftPositionPPR,foundAdp,QB,RB,TE,WR,Pts_PPR,MyVBD,Base,AverageDraftPositionPPRSq,OldQBs,NewQBs
6409,Travis Etienne Jr.,JAX,RB,2022,0.0,-110.182976,0,33.6,1,0,1,0,0,,,,1128.96,199.04,199.04


In [288]:
def load_player_info(link = 'player_info_1.p'):
    with open(link, 'rb') as input:
        b = pickle.load(input)
    pfref_info = pd.DataFrame.from_dict(b, orient= 'index').reset_index()
    pfref_info[['Player', 'FantPos']] = pd.DataFrame(pfref_info['index'].tolist(), index = pfref_info.index)
    pfref_info['FantPos'] = pfref_info['FantPos'].fillna(pfref_info[2])
    final = pfref_info[['Player','FantPos',0, 1]].rename(columns = {0:'DOB',1:'Draft'})
    final['Player'] = final['Player'].str.strip()
    final = final.drop_duplicates()
    return final

a = load_player_info()
a[a['Player'].str.contains('Etienne')]

Unnamed: 0,Player,FantPos,DOB,Draft
2287,Travis Etienne,RB,1999-01-26,25


In [296]:
def add_info(df, link = 'player_info_1.p'):
    pfref_info = load_player_info()

    pfref_info.loc[pfref_info['Player'].str.contains('Chark'), 'Player'] = 'D.J. Chark'
    pfref_info.loc[pfref_info['Player'].str.contains('Ronald Jones'), 'Player'] = 'Ronald Jones'
    pfref_info.loc[pfref_info['Player'].str.contains('Robby Anderson'), 'Player'] = 'Robbie Anderson'
    pfref_info.loc[pfref_info['Player'].str.contains('Will Fuller'), 'Player'] = 'William Fuller V'
    pfref_info.loc[pfref_info['Player'].str.contains('Robert Kelley'), 'Player'] = 'Rob Kelley'
    pfref_info.loc[pfref_info['Player'].str.contains('Willie Snead'), 'Player'] = 'Willie Snead IV'
    pfref_info.loc[pfref_info['Player'].str.contains('Henry Ruggs III'), 'Player'] = 'Henry Ruggs'
    pfref_info.loc[pfref_info['Player'].str.contains('Robert Griffin III'), 'Player'] = 'Robert Griffin'
    pfref_info.loc[pfref_info['Player'].str.contains('Mitchell Trubisky'), 'Player'] = 'Mitch Trubisky'
    pfref_info.loc[pfref_info['Player'].str.contains('Tony Jones'), 'Player'] = 'Tony Jones Jr.'
    pfref_info.loc[pfref_info['Player'].str.contains('John Kelly'), 'Player'] = 'John Kelly Jr.'
    pfref_info.loc[pfref_info['Player'].str.contains('Ben Watson'), 'Player'] = 'Benjamin Watson'
    pfref_info.loc[pfref_info['Player'].str.contains('Travis Etienne'), 'Player'] = 'Travis Etienne Jr.'

    df = df.merge(pfref_info, on = ['Player','FantPos'], how= 'inner')
    # df = df.merge(pfref_info, on = ['Player','FantPos'], how= 'outer', indicator = 'hasInfo')
    df['Age'] = (pd.to_datetime('September 1, ' + df['Year'].astype(str)) - df['DOB']).dt.days / 365
    # print(df[df['hasInfo']== 'left_only'].sort_values('AverageDraftPositionPPR').head(30))
    # print(df[(df['Player'].str.contains('Davis')) & (df['Tm'] == 'BUF')])

    return df, pfref_info

reg_set1, pfref_info = add_info(reg_set)
pred_set1, pfref_info = add_info(pred_set)

In [297]:
def prepPlayerReg(df):
    df['ones'] = 1
    df_tm = df.groupby(['Tm','FantPos','Year'], as_index = False).sum()[['Tm','FantPos','Year','PrvPts_PPR', 'ones']]
    df_tm = df_tm.rename(columns = {'PrvPts_PPR' : 'PrvYrTmPts', 'ones' : 'PlayersAtPosition'})
    
    df = df.merge(df_tm, on = ['Tm','FantPos','Year'], how = 'inner')
    df['PrvYrPtsShare'] = df['PrvPts_PPR'] / df['PrvYrTmPts']
    df.loc[df['PrvYrPtsShare'].isnull(), 'PrvYrPtsShare'] = 1 / df.loc[df['PrvYrPtsShare'].isnull(), 'PlayersAtPosition']

    df = df[df['Year'] >= 2015].copy()
    print(df.shape)
    return df
final_reg = prepPlayerReg(reg_set1)
final_pred = prepPlayerReg(pred_set1)

(4899, 26)
(646, 26)


In [298]:
final_reg['VBD_cls'] = np.where(final_reg['MyVBD'] > 0, 1 ,0)
final_pred['VBD_cls'] = np.where(final_pred['MyVBD'] > 0, 1 ,0)

In [299]:
final_reg.to_excel('historical_data.xlsx')

In [300]:
vars = [
        'PrvMyVBD', 
        'AverageDraftPositionPPR', 'AverageDraftPositionPPRSq', 
        'foundLastYearStats', 
        'foundAdp', 
        'QB','RB','TE','WR',
        'Draft',
        'Age',
        'PrvYrPtsShare',
        'PrvYrTmPts',
        'PlayersAtPosition',
        'OldQBs','NewQBs'
        ]
pred_x = final_pred[vars]
x = final_reg[vars]
y_pts = final_reg['Pts_PPR']
# y_vbd = final_reg['MyVBD']
y_cl = final_reg['VBD_cls']

In [304]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn import linear_model
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss
from sklearn.metrics import r2_score
from sklearn.metrics import make_scorer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor

def try_lassos(x, y, model_type, scorer, alphas = [0.005, 0.01, 0.025, 0.05, 0.1, 1, 5, 10], estimators = None, **kwargs):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

    alpha_scores = {}
    score = make_scorer(scorer)
    if estimators is None:
        estimators = [0]
    for alpha in alphas:
        for estimator_no in estimators:
            kwargs = {'alpha' : alpha, 'max_iter' : 500000}
            model = make_pipeline( PolynomialFeatures(2), MaxAbsScaler(), model_type(C = alpha
                                                                                        ,max_iter = 500000
                                                                                        # ,n_estimators = estimator_no
                                                                                    ))
            print("Cross validation scores...")
            score_arr = cross_val_score(model, X_train, y_train, cv = 5)#, scoring = score)
            avg_score = sum(score_arr) / len(score_arr)
            alpha_scores[(estimator_no, alpha)] = avg_score
    
    best_params = max(alpha_scores, key = alpha_scores.get)
    best_estimator_no, best_alpha = best_params
    print(alpha_scores)
    print(f"Best model had alpha of {best_params}. Now fitting model with this alpha on all training data...")
    model = make_pipeline(PolynomialFeatures(2), MaxAbsScaler(),  model_type(C = best_alpha
                                                                                ,max_iter = 500000
                                                                                # n_estimators = best_estimator_no
                                                                                ))
    model.fit(X_train, y_train)

    # print("Scoring model on test data...")
    final_score = model.score(X_test, y_test)
    # final_score = score(model, X_test, y_test)
    print(final_score)
    model.fit(x, y)
    return model
    
model_ridge = linear_model.Ridge
model_cls = linear_model.RidgeClassifier
model_log = linear_model.LogisticRegression
model_rf = RandomForestClassifier
model_gb = GradientBoostingRegressor

# best_model_pts = try_lassos(x, y_pts, model_ridge, r2_score, alphas = [.025, .05, 0.1, 0.25, 0.5, 1, 2.5, 5, 7.5, 10])
# best_model_log = try_lassos(x, y_cl, model_log, log_loss)
# best_model_pts = try_lassos(x, y_pts, model_gb, r2_score, alphas = [0.025, 0.05, 0.075, 0.1], estimators = [50, 100, 150, 200, 250])
# best_model_vbd = try_lassos(x, y_vbd, model, r2_score)
# best_model_cls = try_lassos(x, y_cl, model_rf, log_loss)

Cross validation scores...
Cross validation scores...
Cross validation scores...
Cross validation scores...
Cross validation scores...
Cross validation scores...
Cross validation scores...
Cross validation scores...
{(0, 0.005): 0.916049599916595, (0, 0.01): 0.9234491881043605, (0, 0.025): 0.9285541611280529, (0, 0.05): 0.927789832408059, (0, 0.1): 0.9270238746839732, (0, 1): 0.927279302525608, (0, 5): 0.9275334271639691, (0, 10): 0.9262572653582506}
Best model had alpha of (0, 0.025). Now fitting model with this alpha on all training data...
0.9224489795918367


In [152]:
def prepare_draft_template():
    # pred_x = pred_set[['AverageDraftPositionPPR', 'AverageDraftPositionPPRSq', 'foundLastYearStats', 'foundAdp', 'QB','RB','TE','WR']]
    a = np.column_stack((np.arange(1, 201), np.square(np.arange(1, 201)), np.ones(200), np.ones(200), 
                            np.ones(200), np.zeros(200), np.zeros(200), np.zeros(200)))
    arrs = [a]
    for i in range(5, 8):
        tmp = arrs[-1].copy()
        tmp[:, [i-1, i]] = tmp[:, [i, i - 1]]
        arrs.append(tmp)
    return arrs

arrs = prepare_draft_template()
final = pd.DataFrame(np.concatenate(arrs))
print(final)

         0        1    2    3    4    5    6    7
0      1.0      1.0  1.0  1.0  1.0  0.0  0.0  0.0
1      2.0      4.0  1.0  1.0  1.0  0.0  0.0  0.0
2      3.0      9.0  1.0  1.0  1.0  0.0  0.0  0.0
3      4.0     16.0  1.0  1.0  1.0  0.0  0.0  0.0
4      5.0     25.0  1.0  1.0  1.0  0.0  0.0  0.0
..     ...      ...  ...  ...  ...  ...  ...  ...
795  196.0  38416.0  1.0  1.0  0.0  0.0  0.0  1.0
796  197.0  38809.0  1.0  1.0  0.0  0.0  0.0  1.0
797  198.0  39204.0  1.0  1.0  0.0  0.0  0.0  1.0
798  199.0  39601.0  1.0  1.0  0.0  0.0  0.0  1.0
799  200.0  40000.0  1.0  1.0  0.0  0.0  0.0  1.0

[800 rows x 8 columns]


In [153]:
np_res = np.column_stack((best_model_pts.predict(final) , [i[1] for i in best_model_log.predict_proba(final)]))
final_additions = pd.DataFrame(np_res)
final_additions.columns = ['Predicted Points', 'Predicted Prob']
final = final.join(final_additions)
final.to_excel('draftPositionPredictions2.xlsx')
# final[final[0].isin([1, 10, 20, 30, 40, 50, 60, 70, 80])]

In [305]:
final_pred = final_pred.reset_index().drop('index', axis = 1)
final_pred['Preds'] = best_model_pts.predict(pred_x)
final_pred['Prob'] = [i[1] for i in best_model_log.predict_proba(pred_x)]
final_pred.sort_values('AverageDraftPositionPPR', ascending = True, inplace = True)
print(final_pred.head())
final_pred.to_excel('test_4.xlsx')

                  Player   Tm FantPos  Year  PrvPts_PPR  PrvMyVBD  \
351      Jonathan Taylor  IND      RB  2022       373.1     187.5   
435  Christian McCaffrey  CAR      RB  2022       127.5     -58.1   
21         Austin Ekeler  LAC      RB  2022       343.8     158.2   
209        Derrick Henry  TEN      RB  2022       193.3       7.7   
200     Justin Jefferson  MIN      WR  2022       330.4     127.9   

     foundLastYearStats  AverageDraftPositionPPR  foundAdp  QB  ...  \
351                   1                      1.3         1   0  ...   
435                   1                      2.4         1   0  ...   
21                    1                      2.7         1   0  ...   
209                   1                      4.3         1   0  ...   
200                   1                      4.6         1   0  ...   

           DOB  Draft        Age  ones  PrvYrTmPts  PlayersAtPosition  \
351 1999-01-19     41  23.632877     1       582.1                  5   
435 1996-06-

In [1983]:
pred_set

Unnamed: 0,Player,Tm,FantPos,Year,PrvPts_HPPR,PrvMyVBD,foundLastYearStats,AverageDraftPositionPPR,foundAdp,QB,RB,TE,WR,Pts_HPPR,MyVBD,Base,AverageDraftPositionPPRSq
6531,Josh Allen,BUF,QB,2022,402.58,102.100000,1,20.4,1,1,0,0,0,,,,416.16
6532,Justin Herbert,LAC,QB,2022,380.76,80.280000,1,33.2,1,1,0,0,0,,,,1102.24
6533,Tom Brady,TAM,QB,2022,374.74,74.260000,1,86.8,1,1,0,0,0,,,,7534.24
6534,Patrick Mahomes,KAN,QB,2022,361.66,61.180000,1,38.3,1,1,0,0,0,,,,1466.89
6535,Aaron Rodgers,GNB,QB,2022,332.80,32.320000,1,80.2,1,1,0,0,0,,,,6432.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7411,Tre Turner,LVR,WR,2022,0.00,-100.954626,0,173.0,0,0,0,0,1,,,,29929.00
7412,Britain Covey,PHI,WR,2022,0.00,-100.954626,0,173.0,0,0,0,0,1,,,,29929.00
7413,Jerrion Ealy,KAN,WR,2022,0.00,-100.954626,0,173.0,0,0,0,0,1,,,,29929.00
7414,Makai Polk,BAL,WR,2022,0.00,-100.954626,0,173.0,0,0,0,0,1,,,,29929.00


In [469]:
np.arange(0, 100)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [90]:
x

Unnamed: 0,PrvMyVBD,foundLastYear,AverageDraftPositionPPR,foundAdp,QB,RB,TE,WR
0,-91.480000,1,20.8,1,1,0,0,0
1,31.660000,1,51.9,1,1,0,0,0
2,9.260000,1,104.2,1,1,0,0,0
3,149.060000,1,9.4,1,1,0,0,0
4,-2.080000,1,129.5,1,1,0,0,0
...,...,...,...,...,...,...,...,...
4419,-157.600000,1,201.0,0,0,0,0,1
4420,-107.454626,0,201.0,0,0,0,0,1
4421,-107.454626,0,201.0,0,0,0,0,1
4422,-107.454626,0,201.0,0,0,0,0,1


In [89]:
best_model['lasso'].coef_
# best_model['polynomialfeatures'].feature_names_in_

array([ 0.00000000e+00,  2.54886880e+02, -1.50302903e-01, -2.74045207e+02,
        2.40678061e-02, -1.09929976e+01,  3.00549425e-04, -0.00000000e+00,
        0.00000000e+00, -7.33820953e+01, -1.54424049e+02, -2.12188648e+01,
       -7.20965040e+01, -1.31897977e+01,  3.04093150e+00,  7.32278857e+01,
       -0.00000000e+00, -5.79969974e+01,  4.09516946e+01, -8.35034331e+00,
       -2.08291578e+01, -0.00000000e+00,  4.75539772e+00, -5.83062617e+00,
        1.15422776e+02,  0.00000000e+00, -2.35040264e+01, -1.03075303e+01,
        8.29136808e+01,  0.00000000e+00,  2.45260797e+01,  5.80305935e+01,
       -1.67563932e+01,  2.21129756e+01, -0.00000000e+00, -5.85855438e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  2.30796065e+01,
        0.00000000e+00,  0.00000000e+00, -4.00616248e+01,  0.00000000e+00,
        6.23331578e+00])

In [411]:
test.groupby(['FantPos', 'Year'], as_index = False).mean()[['FantPos','Year', 'MyVBD']]


Unnamed: 0,FantPos,Year,MyVBD
0,QB,2014,-159.503733
1,QB,2015,-167.549167
2,QB,2016,-145.828824
3,QB,2017,-151.290411
4,QB,2018,-166.008649
5,QB,2019,-153.993239
6,QB,2020,-217.272683
7,QB,2021,-189.56988
8,RB,2014,-67.214286
9,RB,2015,-73.550675


In [317]:
test = adp_df.copy()
test['New Name'] = test['Name'].str.replace('([I ]+$)', '',regex= True)
test[test['Name'] != test['New Name']]

Unnamed: 0,Name,Year,Team,Position,PositionRank,AverageDraftPositionPPR,CB,DB,DE,DL,...,LS,NT,OLB,QB,RB,S,SS,TE,WR,New Name


In [189]:
pts_df

Unnamed: 0,Player,Tm,FantPos,Age,G,GS,PassCmp,PassAtt,PassYds,PassTD,...,VBD,PosRank,OvRank,Year,Pts_HPPR,Pts_PPR,MyRk,Baseline,Base,MyVBD
0,Aaron Rodgers,GNB,QB,31,16,16,341,520,4381.0,38.0,...,89,1,13,2014,354.14,354.14,1.0,0,269.5,84.64
1,Andrew Luck,IND,QB,25,16,16,380,616,4761.0,40.0,...,86,2,14,2014,351.74,351.74,2.0,0,269.5,82.24
2,Russell Wilson,SEA,QB,26,16,16,285,452,3475.0,20.0,...,64,3,22,2014,330.10,330.60,3.0,0,269.5,61.10
3,Peyton Manning,DEN,QB,38,16,16,395,597,4727.0,39.0,...,47,4,32,2014,312.68,312.68,4.0,0,269.5,43.18
4,Ben Roethlisberger,PIT,QB,32,16,16,408,608,4952.0,32.0,...,41,5,37,2014,306.18,306.18,5.0,0,269.5,36.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4917,Justin Watson,TAM,,25,1,0,0,0,0.0,0.0,...,,255,,2021,0.00,0.00,478.0,0,,
4918,Nsimba Webster,CHI,,25,6,0,0,0,0.0,0.0,...,,256,,2021,0.00,0.00,479.0,0,,
4919,David Wells,ARI,,26,3,1,0,0,0.0,0.0,...,,142,,2021,0.00,0.00,480.0,0,,
4920,Jordan Wilkins,2TM,,27,5,0,0,0,0.0,0.0,...,,176,,2021,0.00,0.00,481.0,0,,


In [188]:
adp_df

Unnamed: 0,Name,Year,Team,Position,PositionRank,AverageDraftPositionPPR,CB,DB,DE,DL,...,LB,LS,NT,OLB,QB,RB,S,SS,TE,WR
0,LeSean McCoy,2014,PHI,RB,1,1.8,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,Jamaal Charles,2014,KC,RB,2,2.7,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,Adrian Peterson,2014,MIN,RB,3,3.9,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,Eddie Lacy,2014,GB,RB,4,5.9,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,Matt Forte,2014,CHI,RB,5,6.0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9742,Isaiah Dunn,2021,NYJ,CB,496,2111.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9743,Shaun Crawford,2021,LV,CB,497,2112.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9744,Barrington Wade,2021,DEN,ILB,353,2113.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9745,Jovan Swann,2021,BAL,DT,399,2114.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
d = best_model_log.predict_proba(x)
reg_set['probs'] = [d[1] for d in d]

In [None]:
reg_set.sort_values(['Year','AverageDraftPositionPPR'], ascending = [False, True])

Unnamed: 0,Player,Tm,FantPos,Year,PrvMyVBD,foundLastYearStats,AverageDraftPositionPPR,foundAdp,QB,RB,TE,WR,Pts_HPPR,MyVBD,Base,AverageDraftPositionPPRSq,VBD_cls,probs
3991,Christian McCaffrey,CAR,RB,2021,-74.500000,1,1.3,1,0,1,0,0,109.00,-59.60,168.6,1.69,0,0.721385
3967,Dalvin Cook,MIN,RB,2021,159.400000,1,2.5,1,0,1,0,0,189.30,20.70,168.6,6.25,1,0.715512
3960,Alvin Kamara,NOR,RB,2021,179.900000,1,3.1,1,0,1,0,0,211.20,42.60,168.6,9.61,1,0.712551
4245,Davante Adams,GNB,WR,2021,138.200000,1,4.5,1,0,0,0,1,282.80,114.30,168.5,20.25,1,0.754620
3958,Ezekiel Elliott,DAL,RB,2021,41.300000,1,5.3,1,0,1,0,0,228.56,59.96,168.6,28.09,1,0.701557
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,Micheal Spurlock,CHI,WR,2014,-87.233021,0,171.0,0,0,0,0,1,1.30,-164.20,165.5,29241.00,0,0.007729
524,Kevin Dorsey,GNB,WR,2014,-87.233021,0,171.0,0,0,0,0,1,0.90,-164.60,165.5,29241.00,0,0.007729
525,De'Andre Presley,CAR,WR,2014,-87.233021,0,171.0,0,0,0,0,1,-0.20,-165.70,165.5,29241.00,0,0.007729
526,Jalen Saunders,4TM,WR,2014,-87.233021,0,171.0,0,0,0,0,1,-0.80,-166.30,165.5,29241.00,0,0.007729


In [None]:
def prepare_draft_template():
    pred_x = pred_set[['AverageDraftPositionPPR', 'AverageDraftPositionPPRSq', 'foundLastYearStats', 'foundAdp', 'QB','RB','TE','WR']]
    a = np.column_stack((np.arange(1, 201), np.square(np.arange(1, 201)), np.ones(200), np.ones(200), 
                            np.ones(200), np.zeros(200), np.zeros(200), np.zeros(200)))
    arrs = [a]
    for i in range(5, 8):
        tmp = arrs[-1].copy()
        tmp[:, [i-1, i]] = tmp[:, [i, i - 1]]
        arrs.append(tmp)
    return arrs

arrs = prepare_draft_template()
final = pd.DataFrame(np.concatenate(arrs))
print(final)

         0        1    2    3    4    5    6    7
0      1.0      1.0  1.0  1.0  1.0  0.0  0.0  0.0
1      2.0      4.0  1.0  1.0  1.0  0.0  0.0  0.0
2      3.0      9.0  1.0  1.0  1.0  0.0  0.0  0.0
3      4.0     16.0  1.0  1.0  1.0  0.0  0.0  0.0
4      5.0     25.0  1.0  1.0  1.0  0.0  0.0  0.0
..     ...      ...  ...  ...  ...  ...  ...  ...
795  196.0  38416.0  1.0  1.0  0.0  0.0  0.0  1.0
796  197.0  38809.0  1.0  1.0  0.0  0.0  0.0  1.0
797  198.0  39204.0  1.0  1.0  0.0  0.0  0.0  1.0
798  199.0  39601.0  1.0  1.0  0.0  0.0  0.0  1.0
799  200.0  40000.0  1.0  1.0  0.0  0.0  0.0  1.0

[800 rows x 8 columns]


In [2204]:
list(zip(best_model_pts['polynomialfeatures'].get_feature_names(x.columns), best_model_pts['ridge'].coef_))

[('1', 0.0),
 ('PrvMyVBD', 4.802426512055645),
 ('AverageDraftPositionPPR', -87.95756938449912),
 ('AverageDraftPositionPPRSq', 35.798974770178866),
 ('foundLastYearStats', 11.844969855555506),
 ('foundAdp', 15.053889123591345),
 ('QB', 9.780508926901687),
 ('RB', 13.903716581185568),
 ('TE', -23.30773944173522),
 ('WR', -0.3764860666669353),
 ('Draft', -108.75811088344895),
 ('Age', -153.56683680731044),
 ('PrvYrTmPts', -42.26613053434552),
 ('PlayersAtPosition', -39.49193679185889),
 ('OldQBs', 4.1388680937978535),
 ('NewQBs', -27.109242751358508),
 ('PrvMyVBD^2', -21.838160456116267),
 ('PrvMyVBD AverageDraftPositionPPR', -4.0238025504697115),
 ('PrvMyVBD AverageDraftPositionPPRSq', 20.8623197348867),
 ('PrvMyVBD foundLastYearStats', 98.7057181111844),
 ('PrvMyVBD foundAdp', -78.05223894441255),
 ('PrvMyVBD QB', -0.7875512357588327),
 ('PrvMyVBD RB', -6.86377005867569),
 ('PrvMyVBD TE', -3.039410419971752),
 ('PrvMyVBD WR', 24.156756359656182),
 ('PrvMyVBD Draft', 2.916624867231286)