## Scrape odds continuously from DK

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np
import datetime as dt

In [2]:
dk_hockey_main_url = 'https://sportsbook.draftkings.com/leagues/hockey/nhl?category=game-lines&subcategory=game'

In [3]:
response = requests.get(dk_hockey_main_url)
soup = BeautifulSoup(response.text, 'html.parser')

### Isolate the table for today's games

In [4]:
# All tables on DK
sportsbook_tables = soup.find_all(class_ = 'sportsbook-table')

# Someone please explain why 'tomorrow' means today in the DK HTML even though on the website it says 'Today'
for ind, table in enumerate(sportsbook_tables):
    if 'tomorrow' in [x.text.strip().lower() for x in table.find_all(class_ = 'sportsbook-table-header__title')]:
        today_table = sportsbook_tables[ind]
#today_table

### Gather lists for df (teams, bet type, line, odds)

In [5]:
# List of teams (twice each, 1 for each row of moneyline + puckline)
today_teams = [team.text.strip() for team in today_table.find_all(class_ = 'event-cell__name-text')]
today_teams_game_odds = np.repeat(today_teams, 2)
print(today_teams_game_odds)

# Array of bet types (alternating moneyline + puckline)
n_teams = len(set(today_teams_game_odds))
today_bet_types = ['puckline', 'moneyline'] * n_teams
print(today_bet_types)

# Select every other entry to avoid selectying O/U since they won't fit in our table
today_lines = [x.text for x in today_table.find_all(class_ = 'sportsbook-outcome-cell__line')]
today_lines_game_odds = today_lines[::2]
today_lines_game_odds_revised = []
for ind, line in enumerate(today_lines_game_odds):
    today_lines_game_odds_revised.append(line)
    today_lines_game_odds_revised.append(np.nan)
print(today_lines_game_odds_revised)

# The % 3 != 0 removes the O/U odds since this won't fit in our table
today_odds = [x.text for ind, x in enumerate(today_table.find_all(class_ = 'sportsbook-outcome-cell__elements')) if (ind % 3 != 1)]
print(today_odds)

['CAR Hurricanes' 'CAR Hurricanes' 'WAS Capitals' 'WAS Capitals'
 'CHI Blackhawks' 'CHI Blackhawks' 'MTL Canadiens' 'MTL Canadiens'
 'NJ Devils' 'NJ Devils' 'CBJ Blue Jackets' 'CBJ Blue Jackets'
 'OTT Senators' 'OTT Senators' 'NY Islanders' 'NY Islanders'
 'FLA Panthers' 'FLA Panthers' 'STL Blues' 'STL Blues' 'SEA Kraken'
 'SEA Kraken' 'WPG Jets' 'WPG Jets' 'BOS Bruins' 'BOS Bruins' 'DAL Stars'
 'DAL Stars' 'TB Lightning' 'TB Lightning' 'COL Avalanche' 'COL Avalanche'
 'PIT Penguins' 'PIT Penguins' 'SJ Sharks' 'SJ Sharks']
['puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline', 'puckline', 'moneyline']
['-1.5', nan, '+1.5'

### Get current date

In [6]:
dt_now = dt.datetime.now()

### Set up a data frame that includes all information about money/puck odds

In [7]:
df_today = pd.DataFrame({
    'date_recorded':dt_now.date(),
    'time_recorded':dt_now.strftime('%H:%M:%S'),
    'date_game':dt_now.date(),
    'team':today_teams_game_odds,
    'bet_type':today_bet_types,
    'line':today_lines_game_odds_revised,
    'odds':today_odds})

display(df_today)

Unnamed: 0,date_recorded,time_recorded,date_game,team,bet_type,line,odds
0,2023-02-14,17:07:23,2023-02-14,CAR Hurricanes,puckline,-1.5,+145
1,2023-02-14,17:07:23,2023-02-14,CAR Hurricanes,moneyline,,−170
2,2023-02-14,17:07:23,2023-02-14,WAS Capitals,puckline,1.5,−195
3,2023-02-14,17:07:23,2023-02-14,WAS Capitals,moneyline,,+145
4,2023-02-14,17:07:23,2023-02-14,CHI Blackhawks,puckline,1.5,−225
5,2023-02-14,17:07:23,2023-02-14,CHI Blackhawks,moneyline,,+120
6,2023-02-14,17:07:23,2023-02-14,MTL Canadiens,puckline,-1.5,+165
7,2023-02-14,17:07:23,2023-02-14,MTL Canadiens,moneyline,,−140
8,2023-02-14,17:07:23,2023-02-14,NJ Devils,puckline,-1.5,+115
9,2023-02-14,17:07:23,2023-02-14,NJ Devils,moneyline,,−205


### Write data frame to file or update an existing one

In [12]:
try:
    current_game_team_odds = pd.read_csv('../data/game_team_odds.csv')
    updated_game_team_odds = pd.concat([current_game_team_odds, df_today], axis=0).reset_index(drop=True)
    #display(updated_game_team_odds)
    updated_game_team_odds.to_csv('../data/game_team_odds.csv', header=True, index=False)
except:
    raise Exception('New data was not able to be concatenated')
    #df_today.to_csv('../data/game_team_odds.csv', header=True, index=False) # THIS WILL OVERWRITE ALL OLD DATA

### Set up O/U table

In [13]:
home_team = today_teams[1::2]
home_team = np.repeat(home_team, 2)
print(home_team)

away_team = today_teams[::2]
away_team = np.repeat(away_team, 2)
print(away_team)

ou_lines = today_lines_game_odds = today_lines[1::2]
print(ou_lines)

ou_bet_type = ['O', 'U'] * int(len(ou_lines) / 2)
print(ou_bet_type)

ou_odds = [x.text for ind, x in enumerate(today_table.find_all(class_ = 'sportsbook-outcome-cell__elements')) if (ind % 3 == 1)]
print(ou_odds)

['WAS Capitals' 'WAS Capitals' 'MTL Canadiens' 'MTL Canadiens'
 'CBJ Blue Jackets' 'CBJ Blue Jackets' 'NY Islanders' 'NY Islanders'
 'STL Blues' 'STL Blues' 'WPG Jets' 'WPG Jets' 'DAL Stars' 'DAL Stars'
 'COL Avalanche' 'COL Avalanche' 'SJ Sharks' 'SJ Sharks']
['CAR Hurricanes' 'CAR Hurricanes' 'CHI Blackhawks' 'CHI Blackhawks'
 'NJ Devils' 'NJ Devils' 'OTT Senators' 'OTT Senators' 'FLA Panthers'
 'FLA Panthers' 'SEA Kraken' 'SEA Kraken' 'BOS Bruins' 'BOS Bruins'
 'TB Lightning' 'TB Lightning' 'PIT Penguins' 'PIT Penguins']
['5.5', '5.5', '6', '6', '6.5', '6.5', '6', '6', '6.5', '6.5', '6.5', '6.5', '5.5', '5.5', '5.5', '5.5', '6.5', '6.5']
['O', 'U', 'O', 'U', 'O', 'U', 'O', 'U', 'O', 'U', 'O', 'U', 'O', 'U', 'O', 'U', 'O', 'U']
['−115', '−105', '−110', '−110', '−105', '−115', '−120', '+100', '−140', '+110', '+100', '−120', '−110', '−110', '−120', '+100', '−120', '+100']


In [14]:
df_today_total_odds = pd.DataFrame({
    'date_recorded':dt_now.date(),
    'time_recorded':dt_now.strftime('%H:%M:%S'),
    'date_game':dt_now.date(),
    'home':home_team,
    'away':away_team,
    'bet_type':ou_bet_type,
    'line':ou_lines,
    'odds':ou_odds
})

display(df_today_total_odds)

Unnamed: 0,date_recorded,time_recorded,date_game,home,away,bet_type,line,odds
0,2023-02-14,17:07:23,2023-02-14,WAS Capitals,CAR Hurricanes,O,5.5,−115
1,2023-02-14,17:07:23,2023-02-14,WAS Capitals,CAR Hurricanes,U,5.5,−105
2,2023-02-14,17:07:23,2023-02-14,MTL Canadiens,CHI Blackhawks,O,6.0,−110
3,2023-02-14,17:07:23,2023-02-14,MTL Canadiens,CHI Blackhawks,U,6.0,−110
4,2023-02-14,17:07:23,2023-02-14,CBJ Blue Jackets,NJ Devils,O,6.5,−105
5,2023-02-14,17:07:23,2023-02-14,CBJ Blue Jackets,NJ Devils,U,6.5,−115
6,2023-02-14,17:07:23,2023-02-14,NY Islanders,OTT Senators,O,6.0,−120
7,2023-02-14,17:07:23,2023-02-14,NY Islanders,OTT Senators,U,6.0,+100
8,2023-02-14,17:07:23,2023-02-14,STL Blues,FLA Panthers,O,6.5,−140
9,2023-02-14,17:07:23,2023-02-14,STL Blues,FLA Panthers,U,6.5,+110


In [16]:
try:
    current_game_total_odds = pd.read_csv('../data/game_total_odds.csv')
    updated_game_total_odds = pd.concat([current_game_total_odds, df_today_total_odds], axis=0).reset_index(drop=True)
    #display(updated_game_total_odds)
    updated_game_total_odds.to_csv('../data/game_total_odds.csv', header=True, index=False)
except:
    raise Exception('New data was not able to be concatenated')
    #df_today_total_odds.to_csv('../data/game_total_odds.csv', header=True, index=False) # THIS WILL OVERWRITE ALL OLD DATA