In [2]:
# build out ownership predictions
# and scores while you're at it

import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from datetime import datetime, date, timedelta
import re
import json
from selenium import webdriver
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
import xgboost as xgb
from joblib import dump

In [11]:
def get_lineup(day = datetime.today().date(), slate_id = '1', slate_type= 'MAIN'):
    
    
    # in format YYYY-MM-DD
    plyr_site = requests.get(f"https://rotogrinders.com/lineups/nba?date={day}&site=draftkings")
    plyr_soup = BeautifulSoup(plyr_site.text, 'html.parser')
    
    p_df = pd.DataFrame(columns = ['date','slate_id', 'type', 'total_OU','team','opponent','team_OU', 'player', 'starting', 'salary','position', 'proj_fpts_roto','proj_ownr_roto'])

    # scrape https://rotogrinders.com/lineups/nba?date={day}&site=draftkings for data 
    for game in plyr_soup.find_all('li', attrs = {'data-role':'lineup-card'}):
        row = {}
        row['date'] = day
        row['slate_id'] = slate_id
        row['type'] = slate_type
        row['total_OU'],favorite,row['fav_line'] = game.find('div', attrs = {'class':'ou'}).find('div', attrs = {'class': 'sum'}).text.strip().split()
        
        
        away_team = game.find('div', attrs = {'class': 'blk away-team'})
        home_team = game.find('div', attrs = {'class': 'blk home-team'})

        away_str_cnt = 1
        for player in away_team.find_all('li' ,attrs=  {'class':'player'}):
            row['team'] = game['data-away']
            row['opponent'] = game['data-home']
            if row['team'] == favorite:
                row['favorite'] = 'YES'
            else:
                row['favorite'] = 'NO'
            row['team_OU'] = game.find('div', attrs = {'class':'ou'}).find_all('div')[0].text.strip().split('\n')[0]
            try:
                row['player'] = player.find('a').text
                row['salary'] = player['data-salary']
                row['position'] = player['data-pos']
                row['proj_fpts_roto'] = player.find('span', attrs = {'class':'fpts'}).text
                row['proj_ownr_roto'] = player.find('span', attrs = {'class':'pown'}).text
            except:
                continue
            if away_str_cnt <= 5:
                row['starting'] = 'YES'
            else: 
                row['starting'] = 'NO'
            

            away_str_cnt = away_str_cnt +  1
            
            p_df = p_df.append(row, ignore_index=True)

        home_str_cnt = 1 
        for player in home_team.find_all('li' ,attrs=  {'class':'player'}):
            row['team'] = game['data-home']
            row['opponent'] = game['data-away']
            if row['team'] == favorite:
                row['favorite'] = 'YES'
            else: 
                row['favorite'] = 'NO'
            row['team_OU'] = game.find('div', attrs = {'class':'ou'}).find_all('div')[2].text.strip().split('\n')[0]
            try:
                row['player'] = player.find('a').text
                row['salary'] = player['data-salary']
                row['position'] = player['data-pos']
                row['proj_fpts_roto'] = player.find('span', attrs = {'class':'fpts'}).text
                row['proj_ownr_roto'] = player.find('span', attrs = {'class':'pown'}).text
            except:
                continue
            if home_str_cnt <= 5:
                row['starting'] = 'YES'
            else: 
                row['starting'] = 'NO'
            

            home_str_cnt = home_str_cnt + 1
            
            p_df = p_df.append(row, ignore_index=True)
            
            
    # CLEAN UP SOME COLUMNS
    p_df['total_OU'] = p_df['total_OU'].astype(float)
    p_df['team_OU'] = p_df['team_OU'].astype(float)
    p_df['salary'] = p_df['salary'].str.strip(('$K')).replace('',0).astype(float)*1000
    p_df['proj_fpts_roto'] = p_df['proj_fpts_roto'].replace('',0).astype(float)
    p_df['proj_ownr_roto'] = pd.to_numeric(p_df['proj_ownr_roto'].str.strip('%'), errors='coerce')/100
            
    return p_df



def supp_stats(lineup):
    
    pass



def get_day_ownership(dt = datetime.today().strftime('%Y-%m-%d')):
    # the date gives ownership for that day (posted on website next day)
    # might have to open up executable driver file once before
    a = datetime.strptime(dt, '%Y-%m-%d').date() - date(2020, 9, 2)
    day = a.days + 1326
    driver = webdriver.Firefox(executable_path=r'C:\Users\Luke\geckodriver.exe')
    driver.get(f"https://www.linestarapp.com/Ownership/Sport/NBA/Site/DraftKings/PID/{day}")
    try:
        projected = driver.execute_script('return actualResultsDict')
    except: 
        return None
    if len(projected) == 0 :
        return None
    
    df = pd.DataFrame(list(projected.values())[0]).drop_duplicates()
    df = df.replace(r"&#39;", "'", regex= True)
    
    return df


def build_train_set(start_date, end_date):
    start_date = datetime.strptime(start_date, '%Y-%m-%d').date()
    end_date = datetime.strptime(end_date, '%Y-%m-%d').date()
    day_count = (end_date - start_date).days + 1

    final_df = pd.DataFrame()

    for single_date in (start_date + timedelta(n) for n in range(day_count)):
        single_date_ = single_date.strftime('%Y-%m-%d')
        ownership = get_day_ownership(dt = single_date_)
        if ownership is None:
            continue
        lineup = get_lineup(day = single_date_)
        merged_data = pd.merge(lineup, ownership, left_on = 'player', right_on = 'name')
        final_df = final_df.append(merged_data)
        
    target = final_df['owned']/100
    train = final_df.drop(['owned'], axis = 1)
        
    return train, target

def train_model(train, target, model = LinearRegression()):
    pipe = make_pipeline(OneHotEncoder(handle_unknown = 'ignore'), model)
    mod = pipe.fit(train, target)
    return mod

def pred_ownership(model, df):
    df['predict'] = model.predict(df)
    return df

In [4]:
X, y = build_train_set(start_date = '2020-09-05',end_date  = '2020-09-15')

In [12]:
model = train_model(X,y)
today_slate, true_vals = build_train_set(start_date = '2020-09-08', end_date = '2020-09-08')


In [13]:
dump(model, filename = "dfs_model.joblib")

['dfs_model.joblib']

In [41]:
predictions = pred_ownership(model, today_slate)
predictions['true_vals'] = true_vals
predictions

Unnamed: 0,date,slate_id,type,total_OU,team_x,opponent,team_OU,player,starting,salary,...,proj_ownr_roto,fav_line,favorite,id,name,pos,sal,team_y,predict,true_vals
0,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Goran Dragic,YES,7200.0,...,0.195,-5.5,YES,219,Goran Dragic,PG,7200,MIA,0.281541,0.2278
1,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Jimmy Butler,YES,8500.0,...,0.219,-5.5,YES,102,Jimmy Butler,SG/SF,8500,MIA,0.257392,0.3103
2,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Duncan Robinson,YES,4700.0,...,0.164,-5.5,YES,994,Duncan Robinson,SG/SF,4700,MIA,0.312597,0.3371
3,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Jae Crowder,YES,5800.0,...,0.312,-5.5,YES,3,Jae Crowder,PF,5800,MIA,0.525407,0.296
4,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Bam Adebayo,YES,8700.0,...,0.289,-5.5,YES,852,Bam Adebayo,C,8700,MIA,0.239775,0.3708
5,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Tyler Herro,NO,5300.0,...,0.234,-5.5,YES,1156,Tyler Herro,SG,5300,MIA,0.244402,0.1255
6,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Kelly Olynyk,NO,3300.0,...,0.055,-5.5,YES,11,Kelly Olynyk,PF/C,3300,MIA,0.073668,0.0464
7,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Andre Iguodala,NO,3400.0,...,0.07,-5.5,YES,301,Andre Iguodala,SF,3400,MIA,0.0192,0.0235
8,2020-09-08,1,MAIN,219.5,MIA,MIL,112.5,Kendrick Nunn,NO,3200.0,...,0.001,-5.5,YES,1013,Kendrick Nunn,PG/SG,3200,MIA,0.065021,0.0111
9,2020-09-08,1,MAIN,219.5,MIL,MIA,107.0,Eric Bledsoe,YES,6200.0,...,0.383,-5.5,NO,345,Eric Bledsoe,PG,6200,MIL,0.310135,0.4724


In [42]:
get_lineup('2020-08-10')

Unnamed: 0,date,slate_id,type,total_OU,team,opponent,team_OU,player,starting,salary,position,proj_fpts_roto,proj_ownr_roto,fav_line,favorite
0,2020-08-10,1,MAIN,220.5,OKC,PHO,106.25,Chris Paul,YES,8200.0,PG,33.3,0.251,-8,NO
1,2020-08-10,1,MAIN,220.5,OKC,PHO,106.25,Lu Dort,YES,4000.0,SG/SF,18.5,0.030,-8,NO
2,2020-08-10,1,MAIN,220.5,OKC,PHO,106.25,Abdel Nader,YES,3000.0,PF,20.3,0.001,-8,NO
3,2020-08-10,1,MAIN,220.5,OKC,PHO,106.25,Darius Bazley,YES,3300.0,PF,45.0,0.341,-8,NO
4,2020-08-10,1,MAIN,220.5,OKC,PHO,106.25,Mike Muscala,YES,3200.0,C,25.0,0.351,-8,NO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,2020-08-10,1,MAIN,223.0,LAL,DEN,114.00,Talen Horton-Tucker,NO,3000.0,SG,0.0,,-5,YES
127,2020-08-10,1,MAIN,223.0,LAL,DEN,114.00,Markieff Morris,NO,3000.0,PF/C,17.8,,-5,YES
128,2020-08-10,1,MAIN,223.0,LAL,DEN,114.00,Jared Dudley,NO,3000.0,SF/PF,0.0,,-5,YES
129,2020-08-10,1,MAIN,223.0,LAL,DEN,114.00,Devontae Cacok,NO,3000.0,PF,0.0,,-5,YES


In [7]:
get_day_ownership(dt = '2020-08-2')

Unnamed: 0,id,name,owned,pos,sal,team
0,166,Paul George,88.47,SF,7900,LAC
1,1063,Michael Porter Jr.,87.6,SF/PF,3000,DEN
2,447,Kawhi Leonard,81.29,SF,8900,LAC
3,489,Nikola Jokic,80.1,C,9000,DEN
4,331,Jordan Clarkson,74.12,PG/SG,3900,UTA
5,910,Monte Morris,66.56,PG,3500,DEN
6,63,Joel Embiid,39.15,C,9700,PHI
7,927,Donovan Mitchell,38.23,PG/SG,6900,UTA
8,986,Aaron Holiday,33.66,PG/SG,4000,IND
9,152,Reggie Jackson,30.67,PG,4100,LAC


# Scratch

In [34]:
day = '1325'
results_site = requests.get(f"https://www.linestarapp.com/Ownership/Sport/NBA/Site/DraftKings/PID/{day}")
results_soup = BeautifulSoup(results_site.text, 'html.parser')
table = results_soup.find_all('tbody') #, attrs = {'id':'tableTournament'})

results_soup.find_all('script')

[<script src="/Resources/libraries/jQuery/01_09_01/jquery.js?cdv=61" type="text/javascript"></script>,
 <script src="/Resources/libraries/jQuery-Migrate/01_02_01/jquery-migrate.js?cdv=61" type="text/javascript"></script>,
 <script src="/Resources/libraries/jQuery-UI/01_11_03/jquery-ui.js?cdv=61" type="text/javascript"></script>,
 <script src="/Portals/_default/Skins/FantasySportsSkins/DDRMenu/TWMenu/TWMenu.js" type="text/javascript"></script>,
 <script src="/Telerik.Web.UI.WebResource.axd?_TSM_HiddenField_=ScriptManager_TSM&amp;compress=1&amp;_TSM_CombinedScripts_=%3b%3bSystem.Web.Extensions%2c+Version%3d4.0.0.0%2c+Culture%3dneutral%2c+PublicKeyToken%3d31bf3856ad364e35%3aen-US%3a2d39c544-8ec0-4a2c-bc21-04e23af02570%3aea597d4b%3ab25378d2%3bTelerik.Web.UI%2c+Version%3d2013.2.717.40%2c+Culture%3dneutral%2c+PublicKeyToken%3d121fae78165ba3d4%3aen-US%3aa713c6a1-0827-4380-88eb-63855ca4c2d9%3a16e4e7cd%3af7645509%3aed16cbdc" type="text/javascript"></script>,
 <script src="/js/dnn.js?cdv=61" typ

In [98]:
test= plyr_soup.find_all('li' ,attrs=  {'class':'player'})
#json.loads(test.find('input')['value'])

test[1]

slate = '39113'
slate_type = 'MAIN'

player_df = pd.DataFrame(columns = ['date','slate_id', 'type', 'player',  'salary','position', 'proj_fpts_roto','proj_ownr_roto'])

for player in plyr_soup.find_all('li' ,attrs=  {'class':'player'}):
    row = {}
    row['date'] = datetime.today().date()
    row['slate_id'] = slate
    row['type'] = slate_type
    row['player'] = player.find('a').text
    row['salary'] = player['data-salary']
    row['position'] = player['data-pos']
    row['proj_fpts_roto'] = player.find('span', attrs = {'class':'fpts'}).text
    row['proj_ownr_roto'] = player.find('span', attrs = {'class':'pown'}).text
   
    player_df = player_df.append(row, ignore_index=True)
    

    
player_df

Unnamed: 0,date,slate_id,type,player,salary,position,proj_fpts_roto,proj_ownr_roto
0,2020-09-01,39113,MAIN,Kemba Walker,$7.6K,PG,38.27,32.20%
1,2020-09-01,39113,MAIN,Marcus Smart,$6K,PG/SG,29.27,24.90%
2,2020-09-01,39113,MAIN,Jaylen Brown,$7.3K,SG/SF,38.88,40.50%
3,2020-09-01,39113,MAIN,Jayson Tatum,$9.2K,SF/PF,48.4,30.10%
4,2020-09-01,39113,MAIN,Daniel Theis,$5.2K,PF/C,28.07,33.30%
5,2020-09-01,39113,MAIN,Robert Williams III,$3.3K,C,16.6,17.70%
6,2020-09-01,39113,MAIN,Brad Wanamaker,$3.5K,PG,14.01,18.70%
7,2020-09-01,39113,MAIN,Semi Ojeleye,$3.3K,PF,8.63,8.30%
8,2020-09-01,39113,MAIN,Grant Williams,$3.1K,SF/PF,5.99,11.40%
9,2020-09-01,39113,MAIN,Enes Kanter,$3.4K,C,0.0,
