## Time Zone Correction
Need to correct time zone issue when web scraping odds from DK. Should try to always read things in a specific time zone, and store in a specific time zone. Should never have to manually convert depending on the time of year.

In [245]:
# Libraries
import numpy as np
import pandas as pd
import datetime as dt
from dateutil import tz
import requests
from bs4 import BeautifulSoup
import json
import pytz

In [246]:
# Read in team name to 3 letter code dictionary
# Read in team name dictionary for cleaning
with open('../data/team_name_dictionary.txt', 'r') as f:
    # Load the dictionary from the file
    team_name_dict = json.load(f)

In [301]:
url = 'https://sportsbook.draftkings.com/leagues/hockey/nhl?category=game-lines&subcategory=game'

In [302]:
def get_total_odds(sportsbook_recording, today_only=True):
    # Call in the dictionary for team names
    global team_name_dict

    # List of home teams
    home_teams = sportsbook_recording['teams'][1::2]
    home_teams = np.repeat(home_teams, 2)

    # List of away teams
    away_teams = sportsbook_recording['teams'][::2]
    away_teams = np.repeat(away_teams, 2)

    # List of O/U lines (ex: 6.5, 6.5, 5.5, 5.5, 6, 6, etc...)
    ou_lines = sportsbook_recording['lines'][1::2]

    # List of O/U bet types (O then U repeated)
    ou_bet_type = ['O', 'U'] * int(len(ou_lines) / 2)

    # List of today's O/U odds
    ou_odds = sportsbook_recording['odds'][1::3]
        
    df_total = pd.DataFrame({
        'date_recorded':sportsbook_recording['datetime'].date(),
        'time_recorded':sportsbook_recording['datetime'].strftime('%H:%M:%S'),
        'game_id':np.nan,
        'date_game':sportsbook_recording['game_dates'],
        'time_game':sportsbook_recording['game_times'],
        'home':home_teams,
        'away':away_teams,
        'bet_type':ou_bet_type,
        'total_line':ou_lines,
        'total_odds':ou_odds
    })

    # Convert team names to 3 letter code
    df_total['home'] = df_total['home'].str.lower().replace(team_name_dict)
    df_total['away'] = df_total['away'].str.lower().replace(team_name_dict)
    
    # Return df of odds
    if today_only:
        return df_total[df_total['date_game'] == dt.date.today()]
    else:
        return df_total

In [303]:
# Function to return data frames from DK containing the cleaned odds information for 1) Moneyline/Puckline and 2) O/U's
def retrieve_sportsbook_info(url):

    # Record the current date and time so we know when the recording occured
    dt_now = dt.datetime.now()

    # Record the HTML code from url as bs4 object
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    print(soup.find('title').text.strip().lower())

    # Each sportsbook table on the page separated in a list
    # We need them as separate items in list because each one is a different date.
    # This is the only way to correctly assign a date to each game.
    sportsbook_tables = soup.find_all(class_ = 'sportsbook-table')

    # Establish time zones. DK stores in UTC. We will need to convert this to Central Time (taking into account whether currently in DST, etc.)
    central_tz = pytz.timezone('America/Chicago')
    utc_tz = pytz.timezone('UTC')

    # Create empty spaces to store each set of datetimes, teams, lines, and odds
    # We will extend these lists for each DK table we come across
    games_dt = []
    teams = []
    lines = []
    odds = []

    # For each sportsbook table on DK, collect the odds information
    for table in sportsbook_tables:
        # Get the DK date label
        date_label = table.find(class_ = 'always-left column-header').text.strip().lower()

        # If label is not 'today' or 'tomorrow', just go to the next table. This is because even with the time zone issues, games occuring today should never appear in a table not labeled 'today' or 'tomorrow'.
        # Set the date variable to attach to the game times
        if date_label == 'tue oct 10th': # in production, use 'today'
            date = dt.date.today()
        elif date_label == 'wed oct 11th': # in production use 'tommorrow'
            date = dt.date.today() + dt.timedelta(days = 1)
        else:
            continue

        # Gather DK version of time information
        dk_times = [time.text for time in table.find_all(class_ = 'event-cell__start-time')]
        dk_times = [dt.datetime.strptime(time, '%I:%M%p').time() for time in dk_times]

        # Create the DK version of the game's date and time
        tbl_games_dt = [dt.datetime.combine(date, time) for time in dk_times]

        # Perform the time zone change to central time
        tbl_games_dt = [gametime.replace(tzinfo = utc_tz).astimezone(central_tz).replace(tzinfo = None) for gametime in tbl_games_dt]
        games_dt.extend(tbl_games_dt)
        
        # Get the list of teams playing
        tbl_teams = [team.text.strip() for team in table.find_all(class_ = 'event-cell__name-text')]
        teams.extend(tbl_teams)

        # Gathers puck line and O/U lines (ex: -1.5, 6.5, +1.5, 6.5, etc...)
        tbl_lines = [line.text for line in table.find_all(class_ = 'sportsbook-outcome-cell__line')]
        lines.extend(tbl_lines)
        
        # List of odds 
        # Had to add replace statement for special character
        tbl_odds = [odd.text.replace("−", "-") for odd in table.find_all(class_ = 'sportsbook-outcome-cell__elements')]  
        odds.extend(tbl_odds)

     # END For Loop

    # Create dictionary of information regarding the tables DK.
    # Later, we will filter this to only include todays games.
    combined_info = {
        'datetime':dt_now,
        'game_dates':[game.date() for game in games_dt],
        'game_times':[game.time() for game in games_dt],
        'teams':teams,
        'lines':lines,
        'odds':odds
    }

    return combined_info

### End

Need to do the following:
* Only request the website once
* If possible, not call the ML/PL/Total functions within the sportsbook recording function. 

In [304]:
test = retrieve_sportsbook_info(url)

nhl betting odds & lines: game lines - game | draftkings sportsbook


In [281]:
a = [0]
print(bool(a))

True


In [277]:
print(test)

{'datetime': datetime.datetime(2023, 8, 10, 13, 29, 49, 510214), 'game_dates': [], 'game_times': [], 'teams': [], 'lines': [], 'odds': []}


In [274]:
display(get_total_odds(test, today_only=True))

Unnamed: 0,date_recorded,time_recorded,game_id,date_game,time_game,home,away,bet_type,total_line,total_odds
0,2023-08-10,13:01:03,,2023-08-10,16:30:00,TBL,NSH,O,6.0,-115
1,2023-08-10,13:01:03,,2023-08-10,16:30:00,TBL,NSH,U,6.0,-105
2,2023-08-10,13:01:03,,2023-08-10,19:00:00,PIT,CHI,O,6.5,-120
3,2023-08-10,13:01:03,,2023-08-10,19:00:00,PIT,CHI,U,6.5,100
4,2023-08-10,13:01:03,,2023-08-10,21:30:00,VGK,SEA,O,6.0,-120
5,2023-08-10,13:01:03,,2023-08-10,21:30:00,VGK,SEA,U,6.0,100


In [305]:
a = [1, 2]
print(f'length is {len(a)}')

length is 2
