## Scrape odds continuously from DK

In [121]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np
import datetime as dt

In [8]:
dk_hockey_main_url = 'https://sportsbook.draftkings.com/leagues/hockey/nhl?category=game-lines&subcategory=game'

In [35]:
response = requests.get(dk_hockey_main_url)
soup = BeautifulSoup(response.text, 'html.parser')

### Isolate the table for today's games

In [62]:
# All tables on DK
sportsbook_tables = soup.find_all(class_ = 'sportsbook-table')

# Someone please explain why 'tomorrow' means today in the DK HTML even though on the website it says 'Today'
for ind, table in enumerate(sportsbook_tables):
    if 'tomorrow' in [x.text.strip().lower() for x in table.find_all(class_ = 'sportsbook-table-header__title')]:
        today_table = sportsbook_tables[ind]
#today_table

### Gather lists for df (teams, bet type, line, odds)

In [181]:
# List of teams (twice each, 1 for each row of moneyline + puckline)
today_teams = [team.text.strip() for team in today_table.find_all(class_ = 'event-cell__name-text')]
today_teams_game_odds = np.repeat(today_teams, 2)
print(today_teams_game_odds)

# Array of bet types (alternating moneyline + puckline)
n_teams = len(set(today_teams_game_odds))
today_bet_types = ['puckline', 'moneyline'] * n_teams
print(today_bet_types)

# Select every other entry to avoid selectying O/U since they won't fit in our table
today_lines = [x.text for x in today_table.find_all(class_ = 'sportsbook-outcome-cell__line')]
today_lines_game_odds = today_lines[::2]
today_lines_game_odds_revised = []
for ind, line in enumerate(today_lines_game_odds):
    today_lines_game_odds_revised.append(line)
    today_lines_game_odds_revised.append(np.nan)
print(today_lines_game_odds_revised)

# The % 3 != 0 removes the O/U odds since this won't fit in our table
today_odds = [x.text for ind, x in enumerate(today_table.find_all(class_ = 'sportsbook-outcome-cell__elements')) if (ind % 3 != 1)]
print(today_odds)

['CGY Flames' 'CGY Flames' 'OTT Senators' 'OTT Senators' 'ARI Coyotes'
 'ARI Coyotes' 'NSH Predators' 'NSH Predators' 'FLA Panthers'
 'FLA Panthers' 'MIN Wild' 'MIN Wild' 'DET Red Wings' 'DET Red Wings'
 'VAN Canucks' 'VAN Canucks' 'BUF Sabres' 'BUF Sabres' 'LA Kings'
 'LA Kings']
['puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline']
['-1.5', nan, '+1.5', nan, '+1.5', nan, '-1.5', nan, '+1.5', nan, '-1.5', nan, '+1.5', nan, '-1.5', nan, '+1.5', nan, '-1.5', nan]
['+145', '−165', '−195', '+140', '−120', '+200', '+100', '−240', '−290', '−110', '+205', '−110', '−260', '−105', '+185', '−115', '−220', '+125', '+160', '−145']


### Get current date

In [138]:
dt_now = dt.datetime.now()

str

### Set up a data frame that includes all information about money/puck odds

In [167]:
df_today = pd.DataFrame({
    'date_recorded':dt_now.date(),
    'time_recorded':dt_now.strftime('%H:%M:%S'),
    'date_game':dt_now.date(),
    'team':today_teams_game_odds,
    'bet_type':today_bet_types,
    'line':today_lines_game_odds_revised,
    'odds':today_odds})

display(df_today)

Unnamed: 0,date_recorded,time_recorded,date_game,team,bet_type,line,odds
0,2023-02-13,20:37:33,2023-02-13,CGY Flames,puckline,-1.5,+145
1,2023-02-13,20:37:33,2023-02-13,CGY Flames,moneyline,,−165
2,2023-02-13,20:37:33,2023-02-13,OTT Senators,puckline,1.5,−195
3,2023-02-13,20:37:33,2023-02-13,OTT Senators,moneyline,,+140
4,2023-02-13,20:37:33,2023-02-13,ARI Coyotes,puckline,1.5,−120
5,2023-02-13,20:37:33,2023-02-13,ARI Coyotes,moneyline,,+200
6,2023-02-13,20:37:33,2023-02-13,NSH Predators,puckline,-1.5,+100
7,2023-02-13,20:37:33,2023-02-13,NSH Predators,moneyline,,−240
8,2023-02-13,20:37:33,2023-02-13,FLA Panthers,puckline,1.5,−290
9,2023-02-13,20:37:33,2023-02-13,FLA Panthers,moneyline,,−110


### Write data frame to file or update an existing one

In [169]:
try:
    current_game_team_odds = pd.read_csv('../data/game_team_odds.csv')
    updated_game_team_odds = pd.concat([current_game_team_odds, df_today], axis=0)
    updated_game_team_odds.to_csv('../data/game_team_odds.csv', header=True, index=False)
except:
    df_today.to_csv('../data/game_team_odds.csv', header=True, index=False)

### Set up O/U table

In [190]:
home_team = today_teams[1::2]
home_team = np.repeat(home_team, 2)
print(home_team)

away_team = today_teams[::2]
away_team = np.repeat(away_team, 2)
print(away_team)

ou_lines = today_lines_game_odds = today_lines[1::2]
print(ou_lines)

ou_bet_type = ['O', 'U'] * int(len(ou_lines) / 2)
print(ou_bet_type)

ou_odds = [x.text for ind, x in enumerate(today_table.find_all(class_ = 'sportsbook-outcome-cell__elements')) if (ind % 3 == 1)]
print(ou_odds)

['OTT Senators' 'OTT Senators' 'NSH Predators' 'NSH Predators' 'MIN Wild'
 'MIN Wild' 'VAN Canucks' 'VAN Canucks' 'LA Kings' 'LA Kings']
['CGY Flames' 'CGY Flames' 'ARI Coyotes' 'ARI Coyotes' 'FLA Panthers'
 'FLA Panthers' 'DET Red Wings' 'DET Red Wings' 'BUF Sabres' 'BUF Sabres']
['6.5', '6.5', '6', '6', '6.5', '6.5', '6.5', '6.5', '6.5', '6.5']
['O', 'U', 'O', 'U', 'O', 'U', 'O', 'U', 'O', 'U']
['−115', '−105', '−105', '−115', '−120', '+100', '−130', '+105', '−140', '+110']


In [191]:
df_today_total_odds = pd.DataFrame({
    'date_recorded':dt_now.date(),
    'time_recorded':dt_now.strftime('%H:%M:%S'),
    'date_game':dt_now.date(),
    'home':home_team,
    'away':away_team,
    'bet_type':ou_bet_type,
    'line':ou_lines,
    'odds':ou_odds
})

display(df_today_total_odds)

Unnamed: 0,date_recorded,time_recorded,date_game,home,away,bet_type,line,odds
0,2023-02-13,20:37:33,2023-02-13,OTT Senators,CGY Flames,O,6.5,−115
1,2023-02-13,20:37:33,2023-02-13,OTT Senators,CGY Flames,U,6.5,−105
2,2023-02-13,20:37:33,2023-02-13,NSH Predators,ARI Coyotes,O,6.0,−105
3,2023-02-13,20:37:33,2023-02-13,NSH Predators,ARI Coyotes,U,6.0,−115
4,2023-02-13,20:37:33,2023-02-13,MIN Wild,FLA Panthers,O,6.5,−120
5,2023-02-13,20:37:33,2023-02-13,MIN Wild,FLA Panthers,U,6.5,+100
6,2023-02-13,20:37:33,2023-02-13,VAN Canucks,DET Red Wings,O,6.5,−130
7,2023-02-13,20:37:33,2023-02-13,VAN Canucks,DET Red Wings,U,6.5,+105
8,2023-02-13,20:37:33,2023-02-13,LA Kings,BUF Sabres,O,6.5,−140
9,2023-02-13,20:37:33,2023-02-13,LA Kings,BUF Sabres,U,6.5,+110


In [194]:
try:
    current_game_total_odds = pd.read_csv('../data/game_total_odds.csv')
    updated_game_total_odds = pd.concat([current_game_total_odds, df_today_total_odds], axis=0)
    updated_game_total_odds.to_csv('../data/game_total_odds.csv', header=True, index=False)
except:
    df_today_total_odds.to_csv('../data/game_total_odds.csv', header=True, index=False)