In [1]:
import random
import glob, os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
pd.options.mode.chained_assignment = None # to remove some warnings

In [2]:
def get_weekly_data(week, year):
    """ get player data for designated week """
    file_path = f"./csv's/{year}/year-{year}-week-{week}-DK-player_data.csv"
    df = pd.read_csv(file_path)
    return df

def get_ytd_season_data(year, current_week):
    """ get data for current season up to most recent week """
    df = get_weekly_data(1,year)
    for week in range(2,current_week+1):
        try:
            df = df.append(get_weekly_data(week, year), ignore_index=True)
        except:
            print("No data for week: "+str(week))
    df = df.drop(['Unnamed: 0', 'Year'], axis=1)
    return df

def get_season_data(year, drop_year=True):
    """ get entire season of data """
    df = get_weekly_data(1,year)
    for week in range(2,17):
        try:
            df = df.append(get_weekly_data(week, year), ignore_index=True)
        except:
            print("No data for week: "+str(week))
    if drop_year:
        df = df.drop(['Unnamed: 0', 'Year'], axis=1)
    else:
        df = df.drop(['Unnamed: 0'], axis=1)
    return df

def check_dk_data():
    list_of_files = glob.glob("./csv's/dkdata/*.csv") 
    sorted_files = sorted(list_of_files, key=os.path.getctime)
    most_recent_dkdata = sorted_files[-1] 
    df = pd.read_csv("./csv's/dkdata/fixed*.csv")

In [3]:
week = 6
file_path = f"./csv's/dkdata/predictions/dk_preds-week-{week}.csv"
df = pd.read_csv(file_path)
pd.set_option("display.max_rows", None, "display.max_columns", 20)
df = df.loc[df.Name.isnull() == False]
print(len(df))
df.head(25)

140


Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
0,2,RB,"McCaffrey, Christian",8800.0,car,h,min,75.22,9.802269
1,3,WR,"Hill, Tyreek",8500.0,kan,a,was,113.86,16.62381
2,4,QB,"Mahomes II, Patrick",8300.0,kan,a,was,113.86,16.087543
3,5,QB,"Allen, Josh",8200.0,buf,a,ten,106.02,19.106667
4,8,WR,"Kupp, Cooper",7900.0,lar,a,nyg,139.46,12.869863
5,11,QB,"Brady, Tom",7800.0,tam,a,phi,74.68,15.387328
6,14,WR,"Diggs, Stefon",7400.0,buf,a,ten,106.02,15.870202
7,15,QB,"Jackson, Lamar",7400.0,bal,h,lac,139.7,9.45165
8,16,RB,"Harris, Najee",7400.0,pit,h,sea,121.9,15.557187
9,19,WR,"Moore, D.J.",7300.0,car,h,min,75.22,9.45165


In [4]:
# QB's
df.loc[df.Pos == 'QB'].sort_values(by='Oppt_pts_allowed_lw', ascending=False)

Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
26,69,QB,"Lawrence, Trevor",5700.0,jac,h,mia,181.38,9.802269
7,15,QB,"Jackson, Lamar",7400.0,bal,h,lac,139.7,9.45165
15,31,QB,"Stafford, Matthew",6700.0,lar,a,nyg,139.46,12.959241
33,88,QB,"Roethlisberger, Ben",5400.0,pit,h,sea,121.9,15.557187
2,4,QB,"Mahomes II, Patrick",8300.0,kan,a,was,113.86,16.087543
57,159,QB,"Trubisky, Mitchell",4400.0,buf,a,ten,106.02,15.818352
3,5,QB,"Allen, Josh",8200.0,buf,a,ten,106.02,19.106667
14,30,QB,"Wilson, Russell",6800.0,sea,a,pit,99.92,14.619536
39,109,QB,"Smith, Geno",5100.0,sea,a,pit,99.92,14.619536
31,81,QB,"Jones, Daniel",5500.0,nyg,h,lar,90.82,9.45165


In [5]:
# QB's
df.loc[df.Pos == 'RB'].sort_values(by='Oppt_pts_allowed_lw', ascending=False)

Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
71,194,RB,"Ogunbowale, Dare",4000.0,jac,h,mia,181.38,9.70784
19,42,RB,"Robinson, James",6300.0,jac,h,mia,181.38,9.824931
63,177,RB,"Hyde, Carlos",4200.0,jac,h,mia,181.38,9.70784
27,71,RB,"Murray, Latavius",5700.0,bal,h,lac,139.7,9.352509
106,277,RB,"Ricard, Patrick",4000.0,bal,h,lac,139.7,9.802269
43,125,RB,"Williams, Ty'Son",4900.0,bal,h,lac,139.7,9.802269
61,172,RB,"Michel, Sony",4300.0,lar,a,nyg,139.46,12.701867
128,343,RB,"Watt, Derek",4000.0,pit,h,sea,121.9,15.714861
8,16,RB,"Harris, Najee",7400.0,pit,h,sea,121.9,15.557187
124,339,RB,"Ballage, Kalen",4000.0,pit,h,sea,121.9,18.037073


In [6]:
# QB's
df.loc[df.Pos == 'WR'].sort_values(by='Oppt_pts_allowed_lw', ascending=False)

Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
157,409,WR,"Treadwell, Laquon",3000.0,jac,h,mia,181.38,9.915804
151,403,WR,"Johnson, Tyron",3000.0,jac,h,mia,181.38,9.70784
140,373,WR,"Jefferson, Van",3400.0,lar,a,nyg,139.46,12.701867
183,487,WR,"Atwell, Tutu",3000.0,lar,a,nyg,139.46,13.050542
4,8,WR,"Kupp, Cooper",7900.0,lar,a,nyg,139.46,12.869863
133,353,WR,"Jackson, DeSean",3700.0,lar,a,nyg,139.46,12.701867
54,152,WR,"Smith-Schuster, JuJu",4600.0,pit,h,sea,121.9,15.714861
150,401,WR,"McCloud, Ray-Ray",3100.0,pit,h,sea,121.9,15.870202
129,344,WR,"Washington, James",4000.0,pit,h,sea,121.9,15.818352
207,568,WR,"White, Cody",3000.0,pit,h,sea,121.9,15.870202


In [7]:
# QB's
df.loc[df.Pos == 'TE'].sort_values(by='Oppt_pts_allowed_lw', ascending=False)

Unnamed: 0.1,Unnamed: 0,Pos,Name,Salary,Team,h/a,Oppt,Oppt_pts_allowed_lw,pred
221,594,TE,"O'Shaughnessy, James",2500.0,jac,h,mia,181.38,9.70784
220,591,TE,"Manhertz, Chris",2500.0,jac,h,mia,181.38,9.70784
243,662,TE,"Oliver, Josh",2500.0,bal,h,lac,139.7,9.802269
242,661,TE,"Tomlinson, Eric",2500.0,bal,h,lac,139.7,9.802269
256,704,TE,"Gentry, Zach",2500.0,pit,h,sea,121.9,15.818352
215,580,TE,"Freiermuth, Pat",2900.0,pit,h,sea,121.9,15.818352
211,572,TE,"Ebron, Eric",3000.0,pit,h,sea,121.9,15.818352
235,640,TE,"Winchester, James",2500.0,kan,a,was,113.86,16.62381
234,639,TE,"Fortson, Jody",2500.0,kan,a,was,113.86,16.725164
233,638,TE,"Gray, Noah",2500.0,kan,a,was,113.86,16.725164


In [8]:
file_path = "./csv's/def_df's/most_recent_def_df.csv"
def_df = pd.read_csv(file_path)
def_df.head(15)

Unnamed: 0.1,Unnamed: 0,Week,Name,Pos,Team,h/a,Oppt,DK points,Salary,avg_pts_to_qb,avg_pts_to_rb,avg_pts_to_wr,avg_pts_to_te
0,407,1,Arizona,Def,ari,a,ten,16.0,2100,17.18,17.2,33.0,4.9
1,408,1,New Orleans,Def,nor,h,gnb,15.0,2200,5.04,9.2,29.5,5.7
2,409,1,Pittsburgh,Def,pit,a,buf,14.0,2800,18.2,11.4,50.8,8.1
3,410,1,Philadelphia,Def,phi,a,atl,10.0,2600,7.36,25.0,10.1,13.9
4,411,1,San Francisco,Def,sfo,a,det,10.0,4200,32.92,49.4,33.7,26.9
5,412,1,Carolina,Def,car,h,nyj,9.0,3600,19.32,9.0,42.5,10.8
6,413,1,Denver,Def,den,a,nyg,8.0,3300,22.38,7.6,50.6,2.8
7,414,1,LA Rams,Def,lar,h,chi,8.0,4400,14.24,29.8,30.3,11.3
8,415,1,Las Vegas,Def,lvr,h,bal,7.0,2600,0.0,0.0,0.0,0.0
9,416,1,Houston,Def,hou,h,jac,7.0,2100,25.08,16.2,50.8,20.0


In [9]:
# figure out which teams are giving up the most to qb's
qb_df = (def_df.loc[(def_df.avg_pts_to_qb > 18)]
            .drop(columns=['avg_pts_to_rb', 'avg_pts_to_wr', 'avg_pts_to_te'])
            .sort_values(by='avg_pts_to_qb', ascending=False).head(15))
# sort by name to determine frequency of teams,
# higher frequency = weaker to that position
qb_df.sort_values(by='Name')
qb_counts = qb_df.Team.value_counts()

In [10]:
# figure out which teams are giving up the most to rb's
rb_df = (def_df.loc[(def_df.avg_pts_to_rb > 18)]
            .drop(columns=['avg_pts_to_qb', 'avg_pts_to_wr', 'avg_pts_to_te'])
            .sort_values(by='avg_pts_to_rb', ascending=False).head(15))
# sort by name to determine frequency of teams,
# higher frequency = weaker to that position
rb_df.sort_values(by='Name')
rb_counts = rb_df.Team.value_counts()

In [11]:
# figure out which teams are giving up the most to wr's
wr_df = (def_df.loc[(def_df.avg_pts_to_wr > 18)]
            .drop(columns=['avg_pts_to_qb', 'avg_pts_to_rb', 'avg_pts_to_te'])
            .sort_values(by='avg_pts_to_wr', ascending=False).head(15))
# sort by name to determine frequency of teams,
# higher frequency = weaker to that position
wr_df.sort_values(by='Name')
wr_counts = wr_df.Team.value_counts()

In [12]:
# figure out which teams are giving up the most to te's
te_df = (def_df.loc[(def_df.avg_pts_to_te > 10)]
            .drop(columns=['avg_pts_to_qb', 'avg_pts_to_rb', 'avg_pts_to_wr'])
            .sort_values(by='avg_pts_to_te', ascending=False).head(15))
# sort by name to determine frequency of teams,
# higher frequency = weaker to that position
te_df.sort_values(by='Name')           
te_counts = te_df.Team.value_counts() 

In [13]:
total_counts = [qb_counts, rb_counts, wr_counts, te_counts]
pos = ['qb', 'rb', 'wr', 'te']
def read_counts(array):
    counts = {}
    for i in range(4):
        counts[pos[i]] = array[i][0:3]
    return counts

# this is the total times a def has given 20+
# points (12+ in the case of TE's) up to any
# defense. The higher the numer, the more frequent
# that happens.
count_dict = read_counts(total_counts)
count_dict

{'qb': was    3
 ten    2
 cle    2
 Name: Team, dtype: int64,
 'rb': sea    4
 mia    3
 det    2
 Name: Team, dtype: int64,
 'wr': tam    4
 ten    3
 dal    2
 Name: Team, dtype: int64,
 'te': bal    4
 dal    2
 nyg    2
 Name: Team, dtype: int64}

In [14]:
for key in count_dict.keys():
    print("Pick these", key + "'s:")
    for i in range(3):
        bad_def = count_dict[key].index[i]
        good_play = df.loc[(df.Oppt == bad_def)&(df.Pos == key.upper())].drop(columns=['Unnamed: 0', 'Pos', 'Oppt_pts_allowed_lw'])
        if len(good_play) > 0:
            print(good_play)
    print('=====')

Pick these qb's:
                  Name  Salary Team h/a Oppt       pred
2  Mahomes II, Patrick  8300.0  kan   a  was  16.087543
                  Name  Salary Team h/a Oppt       pred
3          Allen, Josh  8200.0  buf   a  ten  19.106667
57  Trubisky, Mitchell  4400.0  buf   a  ten  15.818352
=====
Pick these rb's:
               Name  Salary Team h/a Oppt       pred
8     Harris, Najee  7400.0  pit   h  sea  15.557187
124  Ballage, Kalen  4000.0  pit   h  sea  18.037073
128     Watt, Derek  4000.0  pit   h  sea  15.714861
                Name  Salary Team h/a Oppt      pred
19   Robinson, James  6300.0  jac   h  mia  9.824931
63      Hyde, Carlos  4200.0  jac   h  mia  9.707840
71  Ogunbowale, Dare  4000.0  jac   h  mia  9.707840
=====
Pick these wr's:
                     Name  Salary Team h/a Oppt       pred
23         Smith, Devonta  6000.0  phi   h  tam  15.870202
90          Reagor, Jalen  4000.0  phi   h  tam  16.164235
134         Watkins, Quez  3500.0  phi   h  tam  16.1642

In [15]:
def pick_def():
    file_path = "./csv's/def_df's/most_recent_def_df.csv"
    def_df = pd.read_csv(file_path).drop(columns=['Unnamed: 0', 'Name', 'h/a']) 
    def_df = (def_df
                .sort_values(by='DK points', ascending=False)
                .head(int(len(def_df) / 10)))
    def_df_counts = def_df['Team'].value_counts()
    def_df_counts_idx = def_df_counts.index
    # print(def_df)

    tier_1_defs = []
    tier_2_defs = []
    match1 = -10
    match2 = -10
    for x, y in zip(def_df_counts, def_df_counts_idx):
        if x > match1:
            match1 = x
            tier_1_defs.clear()
            tier_1_defs.append(y)
        elif x == match1:
            tier_1_defs.append(y)

        if x < match1 and x > match2:
            match2 = x
            tier_2_defs.append(y)
        elif x == match2:
            tier_2_defs.append(y)
    print("Tier 1 defenses: ")
    for defense in tier_1_defs:
        print(defense)
    print("Tier 2 defenses: ")
    for defense in tier_2_defs:
        print(defense)
    return tier_1_defs, tier_2_defs

t1d, t2d = pick_def()

Tier 1 defenses: 
buf
Tier 2 defenses: 
ari
nor
