In [12]:
%%writefile projections.py

import urllib3
from bs4 import BeautifulSoup
from collections import defaultdict

def convert_string(s):
    try:
        return float(s)
    except:
        return 0.0
    
def get_nf_projections():
    base_page = 'https://www.numberfire.com/nfl/fantasy/fantasy-football-ppr-projections'
    http = urllib3.PoolManager()
    response = http.request('GET', base_page)
    soup = BeautifulSoup(response.data, 'html.parser')
    rows = soup.find_all('tr')
    plyr_list = []
    projections = []
    for row in rows:
        if row.a:
            plyr_list.append(row.a.get_text())
        if len(row)==47:
            score = [convert_string(td.string) for td in row.find_all('td', {'class': 'nf_fp active'})]
            if len(score)>0:
                projections.append(score[0])
    plyr_list = [p.split('\n')[1] for p in plyr_list]
    projection_dict = {x: y for (x,y) in zip(plyr_list, projections)}
    return projection_dict

def get_nf_d_projections():
    base_page = 'https://www.numberfire.com/nfl/fantasy/fantasy-football-projections/d'
    http = urllib3.PoolManager()
    response = http.request('GET', base_page)
    soup = BeautifulSoup(response.data, 'html.parser')
    rows = soup.find_all('tr')
    plyr_list = []
    projections = []
    for row in rows:
        if row.a:
            plyr_list.append(row.a.get_text().replace(' D/ST',''))
        if len(row)==41:
            score = [convert_string(td.string) for td in row.find_all('td', {'class': 'nf_fp active'})]
            if len(score)>0:
                projections.append(score[0])
    plyr_list = [p.split('\n')[1] for p in plyr_list]
    projection_dict = {x: y for (x,y) in zip(plyr_list, projections)}
    return projection_dict

def get_espn_projections():
    base_page = 'http://games.espn.com/ffl/tools/projections?'
    addon = 'startIndex='
    startindex = list(range(40, 1080, 40))
    plyr_dict = defaultdict(dict)
    page = base_page
    for i in startindex:
        http = urllib3.PoolManager()
        response = http.request('GET', page)
        soup = BeautifulSoup(response.data, 'html.parser')
        rows = soup.find_all('tr')
        for row in rows:
            if len(row) == 14:
                if row.a.get_text() != 'PLAYER':
                    plyr_dict[row.a.get_text()] = [convert_string(td.string) for td in \
                                                   row.find_all('td', \
                                                                {'class': 'playertableStat appliedPoints sortedCell'})][0]
        page = base_page + addon + str(i)

    d_plyr_dict = {x.split(' ')[0]: y for (x,y) in plyr_dict.items() if x.split(' ')[1] == 'D/ST'}
    return plyr_dict, d_plyr_dict

Overwriting projections.py


In [38]:
import projections
from importlib import reload
reload(projections)
projections.get_nf_d_projections()

{'Pittsburgh': 9.29,
 'Los Angeles Chargers': 8.23,
 'Cleveland': 8.15,
 'Minnesota': 7.68,
 'Washington': 7.46,
 'Miami': 7.03,
 'Detroit': 6.91,
 'New England': 6.73,
 'Baltimore': 6.46,
 'Tampa Bay': 6.23,
 'New Orleans': 6.22,
 'Kansas City': 5.83,
 'Cincinnati': 5.79,
 'Houston': 5.69,
 'Denver': 5.66,
 'Philadelphia': 5.48,
 'Carolina': 5.39,
 'Arizona': 5.21,
 'Green Bay': 5.12,
 'Tennessee': 4.95,
 'Dallas': 4.88,
 'Los Angeles Rams': 4.87,
 'Seattle': 4.85,
 'Jacksonville': 4.65,
 'Atlanta': 4.64,
 'New York Jets': 4.42,
 'Indianapolis': 4.33,
 'Las Vegas': 2.88}

In [158]:
#!usr/bin/python3

import sys
import projections
import pandas as pd
from itertools import combinations
import numpy as np
from joblib import Parallel, delayed
import datetime
from collections import defaultdict

def remove_suffix(p, fanduel=False):

    if fanduel:
        p = p.replace(' Jr.', '')
        if fanduel_d.get(p.strip()):
            p = fanduel_d.get(p.strip())
    if (not fanduel) and defense.get(p.strip()):
        p = defense.get(p.strip())
    p = p.replace(' III', '')
    p = p.replace(' II', '')
    p = p.replace(' IV', '')
    p = p.replace(' V', '')
    p = p.replace('AJ ', 'A.J. ')
    p = p.replace('DJ Chark Jr.', 'D.J. Chark')
    p = p.replace('Marvin Jones Jr.', 'Marvin Jones')
    p = p.replace('Robert Griffin', 'Robert Griffin III')
    p = p.replace('DJ ', 'D.J. ')

    return p

defense = {
     'Steelers': 'Pittsburgh',
     'Chargers': 'Los Angeles Chargers',
     'Browns': 'Cleveland',
     'Vikings': 'Minnesota',
     'WAS Football Team': 'Washington',
     'Dolphins': 'Miami',
     'Lions': 'Detroit',
     'Patriots': 'New England',
     'Ravens': 'Baltimore',
     'Buccaneers': 'Tampa Bay',
     'Saints': 'New Orleans',
     'Chiefs': 'Kansas City',
     'Bengals': 'Cincinnati',
     'Texans': 'Houston',
     'Broncos': 'Denver',
     'Eagles': 'Philadelphia',
     'Panthers': 'Carolina',
     'Cardinals': 'Arizona',
     'Packers': 'Green Bay',
     'Titans': 'Tennessee',
     'Cowboys': 'Dallas',
     'Rams': 'Los Angeles Rams',
     'Seahawks': 'Seattle',
     'Jaguars': 'Jacksonville',
     'Falcons': 'Atlanta',
     'Jets': 'New York Jets',
     'Colts': 'Indianapolis',
     'Raiders': 'Las Vegas'
 }

fanduel_d = {
    'Pittsburgh Steelers': 'Pittsburgh',
    'Miami Dolphins': 'Miami',
    'Baltimore Ravens': 'Baltimore',
    'Cleveland Browns': 'Cleveland',
    'Los Angeles Chargers': 'Los Angeles Chargers',
    'Minnesota Vikings': 'Minnesota',
    'New England Patriots': 'New England',
    'Green Bay Packers': 'Green Bay',
    'New Orleans Saints': 'New Orleans',
    'Washington Football Team': 'Washington',
    'Indianapolis Colts': 'Indianapolis',
    'Philadelphia Eagles': 'Philadelphia',
    'Denver Broncos': 'Denver',
    'Tennessee Titans': 'Tennessee',
    'Cincinnati Bengals': 'Cincinnati',
    'Houston Texans': 'Houston',
    'Detroit Lions': 'Detroit',
    'Carolina Panthers': 'Carolina',
    'Jacksonville Jaguars': 'Jacksonville',
    'Atlanta Falcons': 'Atlanta',
    'New York Jets': 'New York Jets',
    'Dallas Cowboys': 'Dallas',
}

plyr_dict = projections.get_nf_projections()
plyr_dict.update(projections.get_nf_d_projections())

In [167]:
f = 'fanduel.csv'
df = pd.read_csv('inputs/' + f)
assert len(df) == len(df['Nickname'].unique())
players = df['Nickname'].to_list()
players = {p: remove_suffix(p, True) for p in players}
df['Nickname'] = df['Nickname'].replace(players)
df = df.set_index('Nickname')
df['Projection'] = pd.DataFrame.from_dict(plyr_dict, orient='index')

In [149]:
df.loc[df['Position']=='D']

Unnamed: 0_level_0,Id,Position,First Name,Last Name,FPPG,Played,Salary,Game,Team,Opponent,Injury Indicator,Injury Details,Tier,Projection
Nickname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Pittsburgh,51732-12547,D,Pittsburgh,Steelers,10.333333,9,5000,PIT@JAC,PIT,JAC,,,,9.29
Miami,51732-12539,D,Miami,Dolphins,9.444444,9,4800,MIA@DEN,MIA,DEN,,,,7.03
Baltimore,51732-12555,D,Baltimore,Ravens,10.555556,9,4700,TEN@BAL,BAL,TEN,,,,6.46
Cleveland,51732-12529,D,Cleveland,Browns,6.0,9,4600,PHI@CLE,CLE,PHI,,,,8.15
Los Angeles Chargers,51732-12548,D,Los Angeles,Chargers,4.222222,9,4500,NYJ@LAC,LAC,NYJ,,,,8.23
Minnesota,51732-12540,D,Minnesota,Vikings,4.0,9,4400,DAL@MIN,MIN,DAL,,,,7.68
New England,51732-12541,D,New England,Patriots,6.666667,9,4300,NE@HOU,NE,HOU,,,,6.73
Green Bay,51732-12533,D,Green Bay,Packers,4.666667,9,4200,GB@IND,GB,IND,,,,5.12
New Orleans,51732-12542,D,New Orleans,Saints,7.0,9,4100,ATL@NO,NO,ATL,,,,6.22
Washington,51732-12552,D,Washington,Football Team,5.777778,9,4100,CIN@WAS,WAS,CIN,,,,7.46


In [109]:
f = 'draftkings.csv'
df = pd.read_csv('inputs/' + f)
players = df['Name'].to_list()
players = {p: remove_suffix(p) for p in players}
df['Name'] = df['Name'].replace(players)
df = df.set_index('Name')
df['Projection'] = pd.DataFrame.from_dict(plyr_dict, orient='index')
df = df.reset_index()
df['Name'] = df['Name'].apply(lambda x: x.strip())
assert len(df['Name']) == len(df['Name'].unique())
df.set_index(['Name'], inplace=True)

In [168]:
df = df[df['Projection'] > 1]

defense = {'d': 'DST', 'f': 'D'}
plyr_names = {'d': 'Name', 'f': 'Nickname'}
salary = {'d': 50000, 'f': 60000}

df = df.reset_index()
df_pos_salary = df.groupby(['Position', 'Salary'])['Projection'].agg([np.mean, np.max])
df_pos_salary = df_pos_salary.reset_index()
df = df.merge(df_pos_salary, on=['Position', 'Salary'])
df = df[((df['Position'].isin(['QB', defense[f[0]]])) & (df['Projection'] == df['amax'])) | ((df['Position'].isin(['WR', 'TE', 'RB'])) & (df['Projection'] == df['mean']))]
df.set_index(plyr_names[f[0]], inplace=True)
df = df.reset_index()
df_pos = df.groupby(['Position', 'Projection'])['Salary'].agg([np.min])['amin']
df_pos = df_pos.reset_index()
df = df.merge(df_pos, on=['Position', 'Projection'])
df = df[((df['Position'].isin(['QB', defense[f[0]]])) & (df['Salary'] <= df['amin'])) | (df['Position'].isin(['WR', 'TE', 'RB']))]
df.set_index(plyr_names[f[0]], inplace=True)

In [169]:
grouped = df.groupby(['Position'])
position_dict = defaultdict()
for pos, frame in grouped:
    position_dict[pos] = frame.to_dict(orient='index')

player_dict = defaultdict()
for item in position_dict.items():
    for plyr_name in item[1].keys():
        player_dict[plyr_name] = item[1][plyr_name]

te_plyr_list = [k for k,v in player_dict.items() if v['Position'] == 'TE']
rb_plyr_list = [k for k,v in player_dict.items() if v['Position'] == 'RB']
wr_plyr_list = [k for k,v in player_dict.items() if v['Position'] == 'WR']
singles_list = [[qb, d] for qb in position_dict['QB'].keys() for d in position_dict[defense[f[0]]].keys()]

def return_combos(plyr_list, count):
    return list(combinations(plyr_list, count))

te = return_combos(te_plyr_list, 2)
wr_3 = return_combos(wr_plyr_list, 3)
wr_4 = return_combos(wr_plyr_list, 4)
rb_2 = return_combos(rb_plyr_list, 2)
rb_3 = return_combos(rb_plyr_list, 3)

flex_combos = {
    1: {'TE': te_plyr_list, 'RB': rb_3, 'WR': wr_3},
    2: {'TE': te_plyr_list, 'RB': rb_2,  'WR': wr_4},
    3: {'TE': te, 'RB': rb_2, 'WR': wr_3},
    }

def df_value(row, k, n):
    if n == 1:
        return player_dict[row[0]][k]
    if n == 2:
        return player_dict[row[0]][k]+ player_dict[row[1]][k]
    if n == 3:
        return player_dict[row[0]][k] + player_dict[row[1]][k] + player_dict[row[2]][k]
    if n == 4:
        return player_dict[row[0]][k] + player_dict[row[1]][k] + player_dict[row[2]][k] + player_dict[row[3]][k]

def clean_frame(f):
    df_salary = f.groupby(['Salary'])['Projection'].agg([np.max])['amax']
    f = f.merge(df_salary.to_frame(), on=['Salary'])
    f = f[f['Projection'] >= f['amax']]
    del f['amax']

    df_pos = f.groupby(['Projection'])['Salary'].agg([np.min])['amin']
    f = f.merge(df_pos.to_frame(), on=['Projection'])
    f = f[f['Salary'] <= f['amin']]
    del f['amin']
    return f

def get_projections(ck):

    df_qb = pd.DataFrame(singles_list)
    col_n = len(df_qb.columns)
    df_qb['Salary'] = df_qb.apply(df_value, args=(['Salary', col_n]), axis=1)
    df_qb['Projection'] = df_qb.apply(df_value, args=(['Projection', col_n]), axis=1)
    df_qb = clean_frame(df_qb)

    df_te = pd.DataFrame(flex_combos[ck]['TE'])
    col_n = len(df_te.columns)
    df_te['Salary'] = df_te.apply(df_value, args=(['Salary', col_n]), axis=1)
    df_te['Projection'] = df_te.apply(df_value, args=(['Projection', col_n]), axis=1)
    df_te = clean_frame(df_te)

    df_rb = pd.DataFrame(flex_combos[ck]['RB'])
    col_n = len(df_rb.columns)
    df_rb['Salary'] = df_rb.apply(df_value, args=(['Salary', col_n]), axis=1)
    df_rb['Projection'] = df_rb.apply(df_value, args=(['Projection', col_n]), axis=1)
    df_rb = clean_frame(df_rb)

    df_wr = pd.DataFrame(flex_combos[ck]['WR'])
    col_n = len(df_wr.columns)
    df_wr['Salary'] = df_wr.apply(df_value, args=(['Salary', col_n]), axis=1)
    df_wr['Projection'] = df_wr.apply(df_value, args=(['Projection', col_n]), axis=1)
    df_wr = clean_frame(df_wr)

    df_qb['key'] = 1
    df_te['key'] = 1
    df_rb['key'] = 1
    df_wr['key'] = 1

    df = df_te.merge(df_rb, on=['key'], suffixes=('_te', '_rb'))
    df['Salary'] = df['Salary_te'] + df['Salary_rb']
    df['Projection'] = df['Projection_te'] + df['Projection_rb']
    df = df[df['Salary'] <= salary[f[0]] - (min(df_wr['Salary']) + min(df_qb['Salary']))]
    df = clean_frame(df)

    df = df.merge(df_wr, on=['key'], suffixes=('_2', '_wr'))
    df['Salary'] = df['Salary_2'] + df['Salary_wr']
    df['Projection'] = df['Projection_2'] + df['Projection_wr']
    df = df[df['Salary'] <= salary[f[0]] - min(df_qb['Salary'])]
    df = clean_frame(df)

    df = df.merge(df_qb, on=['key'], suffixes=('_3', '_qb'))
    df['Salary'] = df['Salary_3'] + df['Salary_qb']
    df['Projection'] = df['Projection_3'] + df['Projection_qb']
    df = df[df['Salary'] <= salary[f[0]]]
    df = clean_frame(df)

    df = df.sort_values('Projection', ascending=False)
    df.columns = [str(x) for x in df.columns]
    df = df[df.columns[~df.columns.str.contains('Projection_|Salary_|key')]].dropna(axis=1)
    df.columns = list(range(1,10)) + ['Salary', 'Projection']
    return df

In [170]:
# if __name__=="__main__":
start_time = datetime.datetime.now()
results = Parallel(n_jobs=-1)(delayed(get_projections)(k) for k in flex_combos.keys())
df = pd.concat(results, sort=True)
df.reset_index(inplace=True)
df = df[list(range(1,10)) + ['Salary', 'Projection']].sort_values('Projection', ascending=False)
# df.to_csv('results/output_' + sys.argv[1])
df.to_csv('results/output_' + f)

print (datetime.datetime.now() - start_time)
print (df.iloc[0])

0:00:01.216720
1                Mark Andrews
2                  Mike Davis
3              James Robinson
4             Giovani Bernard
5                Keenan Allen
6              Terry McLaurin
7               Brandin Cooks
8              Deshaun Watson
9                     Detroit
Salary                  60000
Projection             139.02
Name: 0, dtype: object


In [66]:
players = df['Name'].to_list()
players = {p: remove_suffix(p) for p in players}
projections = plyr_dict.keys()

players = df['Nickname'].to_list()
players = {p: remove_suffix(p, True) for p in players}
projections = plyr_dict.keys()
for k,v in players.items():
    if v not in projections:
        print(k, v)

for r in df.loc[df['Position']=='D'].T:
    print("'" + df.iloc[r]['Nickname'] + "':", "'" + df.iloc[r]['First Name'] + "',")
    

Dwayne Haskins Jr. Dwayne Haskins Jr.
Marquez Valdes-Scantling Marquezaldes-Scantling
Kyle Allen Kyle Allen
Logan Woodside Logan Woodside
Reggie Bonnafon Reggie Bonnafon
Jonathan Williams Jonathan Williams
Jason Cabinda Jason Cabinda
Scottie Phillips Scottie Phillips
Jacob Eason Jacob Eason
Jordan Love Jordan Love
Marlon Mack Marlon Mack
Patrick Taylor Patrick Taylor
Jordan Howard Jordan Howard
LeVante Bellamy LeVante Bellamy
Josh Dobbs Josh Dobbs
Ryquell Armstead Ryquell Armstead
Trey Edmunds Trey Edmunds
James Morgan James Morgan
Dak Prescott Dak Prescott
Keelan Cole Sr. Keelan Cole Sr.
KJ Hamler KJ Hamler
Laviska Shenault Jr. Laviska Shenault Jr.
Olabisi Johnson Olabisi Johnson
Omar Bayless Omar Bayless
JJ Arcega-Whiteside JJ Arcega-Whiteside
Quez Watkins Quez Watkins
KhaDarel Hodge KhaDarel Hodge
Odell Beckham Jr. Odell Beckham Jr.
JoJo Natson JoJo Natson
Matthew Slater Matthew Slater
Quincy Adeboyejo Quincy Adeboyejo
DeAndre Carter DeAndre Carter
Isaiah Coulter Isaiah Coulter
DeMi