In [1]:
import random
import glob, os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
pd.options.mode.chained_assignment = None # to remove some warnings

In [2]:
def get_current_year():
    from datetime import datetime
    today = datetime.today()
    datem = datetime(today.year, today.month, 1)
    return datem.year
    
def get_weekly_data(week, year):
    """ get player data for designated week """
    file_path = f"./csv's/{year}/year-{year}-week-{week}-DK-player_data.csv"
    df = pd.read_csv(file_path)
    return df

def get_ytd_season_data(year, current_week):
    """ get data for current season up to most recent week """
    df = get_weekly_data(1,year)
    for week in range(2,current_week+1):
        try:
            df = df.append(get_weekly_data(week, year), ignore_index=True)
        except:
            print("No data for week: "+str(week))
    df = df.drop(['Unnamed: 0', 'Year'], axis=1)
    return df

def get_season_data(year, drop_year=True):
    """ get entire season of data """
    df = get_weekly_data(1,year)
    for week in range(2,17):
        try:
            df = df.append(get_weekly_data(week, year), ignore_index=True)
        except:
            print("No data for week: "+str(week))
    if drop_year:
        df = df.drop(['Unnamed: 0', 'Year'], axis=1)
    else:
        df = df.drop(['Unnamed: 0'], axis=1)
    return df

def check_dk_data():
    list_of_files = glob.glob("./csv's/dkdata/*.csv") 
    sorted_files = sorted(list_of_files, key=os.path.getctime)
    most_recent_dkdata = sorted_files[-1] 
    df = pd.read_csv("./csv's/dkdata/fixed*.csv")

In [3]:
week = 6
file_path = f"./csv's/dkdata/predictions/dk_preds-week-{week}.csv"
df = pd.read_csv(file_path)
pd.set_option("display.max_rows", None, "display.max_columns", 20)
df = df.loc[df.Name.isnull() == False]
print(len(df))
df.head(25)

165


Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
0,1,WR,"Adams, Davante",8900.0,gnb,h,was,113.86,20.062857
1,2,RB,"Kamara, Alvin",8900.0,nor,a,sea,121.9,15.688201
2,3,WR,"Hill, Tyreek",8600.0,kan,a,ten,106.02,16.739286
3,5,RB,"McCaffrey, Christian",8500.0,car,a,nyg,139.46,13.643279
4,6,QB,"Mahomes II, Patrick",8400.0,kan,a,ten,106.02,16.631441
5,10,TE,"Kelce, Travis",7600.0,kan,a,ten,106.02,16.689206
6,11,QB,"Rodgers, Aaron",7500.0,gnb,h,was,113.86,16.841176
7,12,RB,"Jones, Aaron",7500.0,gnb,h,was,113.86,16.975245
8,15,WR,"Moore, D.J.",7100.0,car,a,nyg,139.46,13.643279
9,17,RB,"Taylor, Jonathan",7100.0,ind,a,sfo,87.86,16.036257


In [4]:
# QB's
df.loc[df.Pos == 'QB'].sort_values(by='Oppt_pts_allowed_lw', ascending=False)

Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
67,207,QB,"Walker, P.J.",4000.0,car,a,nyg,139.46,13.68
18,48,QB,"Darnold, Sam",5900.0,car,a,nyg,139.46,14.670821
29,79,QB,"Jones, Mac",5300.0,nwe,h,nyj,126.68,14.617337
19,51,QB,"Winston, Jameis",5900.0,nor,a,sea,121.9,15.613253
41,110,QB,"Hill, Taysom",5000.0,nor,a,sea,121.9,15.688201
6,11,QB,"Rodgers, Aaron",7500.0,gnb,h,was,113.86,16.841176
58,162,QB,"Love, Jordan",4200.0,gnb,h,was,113.86,16.975245
37,94,QB,"Wilson, Zach",5100.0,nyj,a,nwe,111.38,12.292515
4,6,QB,"Mahomes II, Patrick",8400.0,kan,a,ten,106.02,16.631441
26,74,QB,"Wentz, Carson",5400.0,ind,a,sfo,87.86,15.933766


In [5]:
# QB's
df.loc[df.Pos == 'RB'].sort_values(by='Oppt_pts_allowed_lw', ascending=False)

Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
16,41,RB,"Hubbard, Chuba",6100.0,car,a,nyg,139.46,13.68
68,210,RB,"Freeman, Royce",4000.0,car,a,nyg,139.46,13.680648
3,5,RB,"McCaffrey, Christian",8500.0,car,a,nyg,139.46,13.643279
48,129,RB,"Stevenson, Rhamondre",4700.0,nwe,h,nyj,126.68,14.421829
73,232,RB,"Johnson, Jakob",4000.0,nwe,h,nyj,126.68,14.617337
72,229,RB,"White, James",4000.0,nwe,h,nyj,126.68,14.617337
21,59,RB,"Harris, Damien",5700.0,nwe,h,nyj,126.68,14.617337
71,228,RB,"Taylor, J.J.",4000.0,nwe,h,nyj,126.68,14.617337
70,227,RB,"Bolden, Brandon",4000.0,nwe,h,nyj,126.68,15.227826
1,2,RB,"Kamara, Alvin",8900.0,nor,a,sea,121.9,15.688201


In [6]:
# QB's
df.loc[df.Pos == 'WR'].sort_values(by='Oppt_pts_allowed_lw', ascending=False)

Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
98,344,WR,"Sharpe, Tajae",3400.0,atl,a,mia,181.38,10.46967
12,24,WR,"Ridley, Calvin",6600.0,atl,a,mia,181.38,10.438841
93,329,WR,"Zaccheaus, Olamide",3700.0,atl,a,mia,181.38,10.46967
114,415,WR,"Smith, Shi",3000.0,car,a,nyg,139.46,13.68
8,15,WR,"Moore, D.J.",7100.0,car,a,nyg,139.46,13.643279
45,123,WR,"Anderson, Robby",4800.0,car,a,nyg,139.46,14.617337
100,356,WR,"Marshall, Terrace",3200.0,car,a,nyg,139.46,13.68
112,413,WR,"Zylstra, Brandon",3000.0,car,a,nyg,139.46,13.68
113,414,WR,"Erickson, Alex",3000.0,car,a,nyg,139.46,13.680648
92,327,WR,"Agholor, Nelson",3700.0,nwe,h,nyj,126.68,15.22977


In [7]:
# QB's
df.loc[df.Pos == 'TE'].sort_values(by='Oppt_pts_allowed_lw', ascending=False)

Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
148,571,TE,"Thompson, Colin",2500.0,car,a,nyg,139.46,13.68
142,543,TE,"Tremble, Tommy",2700.0,car,a,nyg,139.46,13.68
139,534,TE,"Thomas, Ian",2900.0,car,a,nyg,139.46,13.68
140,538,TE,"Smith, Jonnu",2800.0,nwe,h,nyj,126.68,14.617337
61,174,TE,"Henry, Hunter",4100.0,nwe,h,nyj,126.68,14.670821
137,531,TE,"Trautman, Adam",3000.0,nor,a,sea,121.9,15.688201
164,651,TE,"Griffin, Garrett",2500.0,nor,a,sea,121.9,15.688201
103,362,TE,"Johnson, Juwan",3200.0,nor,a,sea,121.9,15.688201
158,605,TE,"Davis, Tyler",2500.0,gnb,h,was,113.86,17.167669
157,604,TE,"Deguara, Josiah",2500.0,gnb,h,was,113.86,17.167669


In [8]:
file_path = "./csv's/def_df's/most_recent_def_df.csv"
def_df = pd.read_csv(file_path)
def_df.head(15)

Unnamed: 0.1,Unnamed: 0,Week,Name,Pos,Team,h/a,Oppt,DK points,Salary,avg_pts_to_qb,avg_pts_to_rb,avg_pts_to_wr,avg_pts_to_te
0,407,1,Arizona,Def,ari,a,ten,16.0,2100,17.18,17.2,33.0,4.9
1,408,1,New Orleans,Def,nor,h,gnb,15.0,2200,5.04,9.2,29.5,5.7
2,409,1,Pittsburgh,Def,pit,a,buf,14.0,2800,18.2,11.4,50.8,8.1
3,410,1,Philadelphia,Def,phi,a,atl,10.0,2600,7.36,25.0,10.1,13.9
4,411,1,San Francisco,Def,sfo,a,det,10.0,4200,32.92,49.4,33.7,26.9
5,412,1,Carolina,Def,car,h,nyj,9.0,3600,19.32,9.0,42.5,10.8
6,413,1,Denver,Def,den,a,nyg,8.0,3300,22.38,7.6,50.6,2.8
7,414,1,LA Rams,Def,lar,h,chi,8.0,4400,14.24,29.8,30.3,11.3
8,415,1,Las Vegas,Def,lvr,h,bal,7.0,2600,20.0,30.7,34.6,5.0
9,416,1,Houston,Def,hou,h,jac,7.0,2100,25.08,16.2,50.8,20.0


In [9]:
# figure out which teams are giving up the most to qb's
qb_df = (def_df.loc[(def_df.avg_pts_to_qb > 18)]
            .drop(columns=['avg_pts_to_rb', 'avg_pts_to_wr', 'avg_pts_to_te'])
            .sort_values(by='avg_pts_to_qb', ascending=False).head(15))
# sort by name to determine frequency of teams,
# higher frequency = weaker to that position
qb_df.sort_values(by='Name')
qb_counts = qb_df.Team.value_counts()

In [10]:
# figure out which teams are giving up the most to rb's
rb_df = (def_df.loc[(def_df.avg_pts_to_rb > 18)]
            .drop(columns=['avg_pts_to_qb', 'avg_pts_to_wr', 'avg_pts_to_te'])
            .sort_values(by='avg_pts_to_rb', ascending=False).head(15))
# sort by name to determine frequency of teams,
# higher frequency = weaker to that position
rb_df.sort_values(by='Name')
rb_counts = rb_df.Team.value_counts()

In [11]:
# figure out which teams are giving up the most to wr's
wr_df = (def_df.loc[(def_df.avg_pts_to_wr > 18)]
            .drop(columns=['avg_pts_to_qb', 'avg_pts_to_rb', 'avg_pts_to_te'])
            .sort_values(by='avg_pts_to_wr', ascending=False).head(15))
# sort by name to determine frequency of teams,
# higher frequency = weaker to that position
wr_df.sort_values(by='Name')
wr_counts = wr_df.Team.value_counts()

In [12]:
# figure out which teams are giving up the most to te's
te_df = (def_df.loc[(def_df.avg_pts_to_te > 10)]
            .drop(columns=['avg_pts_to_qb', 'avg_pts_to_rb', 'avg_pts_to_wr'])
            .sort_values(by='avg_pts_to_te', ascending=False).head(15))
# sort by name to determine frequency of teams,
# higher frequency = weaker to that position
te_df.sort_values(by='Name')           
te_counts = te_df.Team.value_counts() 

In [13]:
total_counts = [qb_counts, rb_counts, wr_counts, te_counts]
pos = ['qb', 'rb', 'wr', 'te']
def read_counts(array):
    counts = {}
    for i in range(4):
        counts[pos[i]] = array[i][0:3]
    return counts

# this is the total times a def has given 20+
# points (12+ in the case of TE's) up to any
# defense. The higher the numer, the more frequent
# that happens.
count_dict = read_counts(total_counts)
count_dict

{'qb': was    3
 ten    2
 cle    2
 Name: Team, dtype: int64,
 'rb': sea    4
 mia    3
 det    2
 Name: Team, dtype: int64,
 'wr': tam    4
 ten    3
 dal    2
 Name: Team, dtype: int64,
 'te': bal    4
 dal    2
 gnb    2
 Name: Team, dtype: int64}

In [14]:
for key in count_dict.keys():
    print("Pick these", key + "'s:")
    for i in range(3):
        bad_def = count_dict[key].index[i]
        good_play = df.loc[(df.Oppt == bad_def)&(df.Pos == key.upper())].drop(columns=['Unnamed: 0', 'Pos', 'Oppt_pts_allowed_lw'])
        if len(good_play) > 0:
            print(good_play)
    print('=====')

Pick these qb's:
              Name  Salary Team h/a Oppt       pred
6   Rodgers, Aaron  7500.0  gnb   h  was  16.841176
58    Love, Jordan  4200.0  gnb   h  was  16.975245
                  Name  Salary Team h/a Oppt       pred
4  Mahomes II, Patrick  8400.0  kan   a  ten  16.631441
=====
Pick these rb's:
                  Name  Salary Team h/a Oppt       pred
1        Kamara, Alvin  8900.0  nor   a  sea  15.688201
86      Ozigbo, Devine  4000.0  nor   a  sea  15.688201
87  Washington, Dwayne  4000.0  nor   a  sea  15.688201
88         Armah, Alex  4000.0  nor   a  sea  17.432951
=====
Pick these wr's:
                  Name  Salary Team h/a Oppt       pred
36     Robinson, Allen  5100.0  chi   a  tam  16.171883
50     Mooney, Darnell  4600.0  chi   a  tam  16.171883
107  Goodwin, Marquise  3000.0  chi   a  tam  16.302382
108      Byrd, Damiere  3000.0  chi   a  tam  18.152047
109      Grant, Jakeem  3000.0  chi   a  tam  16.302382
110    Webster, Nsimba  3000.0  chi   a  tam  16.2849

In [15]:
def pick_def():
    file_path = "./csv's/def_df's/most_recent_def_df.csv"
    def_df = pd.read_csv(file_path).drop(columns=['Unnamed: 0', 'Name', 'h/a']) 
    def_df = (def_df
                .sort_values(by='DK points', ascending=False)
                .head(int(len(def_df) / 10)))
    def_df_counts = def_df['Team'].value_counts()
    def_df_counts_idx = def_df_counts.index
    # print(def_df)

    tier_1_defs = []
    tier_2_defs = []
    match1 = -10
    match2 = -10
    for x, y in zip(def_df_counts, def_df_counts_idx):
        if x > match1:
            match1 = x
            tier_1_defs.clear()
            tier_1_defs.append(y)
        elif x == match1:
            tier_1_defs.append(y)

        if x < match1 and x > match2:
            match2 = x
            tier_2_defs.append(y)
        elif x == match2:
            tier_2_defs.append(y)
    print("Tier 1 defenses: ")
    for defense in tier_1_defs:
        print(defense)
    print("Tier 2 defenses: ")
    for defense in tier_2_defs:
        print(defense)
    return tier_1_defs, tier_2_defs

t1d, t2d = pick_def()

Tier 1 defenses: 
buf
Tier 2 defenses: 
ari
nor


In [16]:
big_df_columns = ['Year', 'Week', 'Name', 'Pos', 'Team', 'h/a', 'Oppt', 'DK points', 'DK salary']
big_df = pd.DataFrame(columns=big_df_columns)
for num in range(2014,get_current_year()+1):
    big_df = big_df.append(get_season_data(num, drop_year=False))
big_df['pts/1k'] = big_df['DK points'] / big_df['DK salary'] * 1000

NameError: name 'get_current_year' is not defined

In [None]:
player_name = 'Njoku, David'
big_df.loc[big_df.Name == player_name].sort_values(by='pts/1k', ascending=False)