In [8]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait       
from selenium.webdriver.common.by import By       
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service

options = Options()
options.headless = True

import numpy as np
import pandas as pd
import time as time
from time import sleep
import random
from tqdm import tqdm
import sqlite3
from IPython.display import clear_output

from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.static import teams

In [2]:
def season_string(season):
    return str(season) + '-' + str(season+1)[-2:]

def get_game_dates(season):
    season_str = season_string(season)
    dates = []
    for season_type in ['Regular Season', 'Playoffs']:
        games = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
        dates.extend(games['GAME_DATE'].unique())
        sleep(1)
    return dates


get_game_dates(2021)

['2021-10-19',
 '2021-10-20',
 '2021-10-21',
 '2021-10-22',
 '2021-10-23',
 '2021-10-24',
 '2021-10-25',
 '2021-10-26',
 '2021-10-27',
 '2021-10-28',
 '2021-10-29',
 '2021-10-30',
 '2021-10-31',
 '2021-11-01',
 '2021-11-02',
 '2021-11-03',
 '2021-11-04',
 '2021-11-05',
 '2021-11-06',
 '2021-11-07',
 '2021-11-08',
 '2021-11-09',
 '2021-11-10',
 '2021-11-11',
 '2021-11-12',
 '2021-11-13',
 '2021-11-14',
 '2021-11-15',
 '2021-11-16',
 '2021-11-17',
 '2021-11-18',
 '2021-11-19',
 '2021-11-20',
 '2021-11-21',
 '2021-11-22',
 '2021-11-23',
 '2021-11-24',
 '2021-11-26',
 '2021-11-27',
 '2021-11-28',
 '2021-11-29',
 '2021-11-30',
 '2021-12-01',
 '2021-12-02',
 '2021-12-03',
 '2021-12-04',
 '2021-12-05',
 '2021-12-06',
 '2021-12-07',
 '2021-12-08',
 '2021-12-09',
 '2021-12-10',
 '2021-12-11',
 '2021-12-12',
 '2021-12-13',
 '2021-12-14',
 '2021-12-15',
 '2021-12-16',
 '2021-12-17',
 '2021-12-18',
 '2021-12-19',
 '2021-12-20',
 '2021-12-21',
 '2021-12-22',
 '2021-12-23',
 '2021-12-25',
 '2021-12-

In [4]:
# Get Moneylines
def add_moneylines(conn, start_season, end_season, if_exists='append'):
    
    table_name = 'moneylines'

    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + table_name)
        
    conn.execute("""CREATE TABLE IF NOT EXISTS {} (SEASON TEXT, GM_DATE DATE, HOME_TEAM TEXT,
            AWAY_TEAM TEXT, AWAY_ML TEXT, HOME_ML TEXT)""".format(table_name))
    
    dates_with_no_data = []
    
    seasons = []
    gm_dates = []
    away_teams = []
    home_teams = []
    away_mls = []
    home_mls = []

    for season in range(start_season, end_season+1):
        print("scraping season: {}".format(season_string(season)))
        dates = get_game_dates(season)
        
        for date in tqdm(dates, desc='progress'):
            web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/money-line/?date={}'.format(date)
            path = '/Users/kainoa/Desktop/bettingalgo/chromedriver-mac-x64/chromedriver'
            driver = webdriver.Chrome(path)
            driver.get(web)
            sleep(random.randint(1,2))

            try:
                single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')
            
            except:
                print("No Data for {}".format(date))
                dates_with_no_data.append(date)
                continue
                      
            num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

            num_listed_events = len(single_row_events)
            cutoff = num_listed_events - num_postponed_events

            for event in single_row_events[:cutoff]:
                seasons.append(season_string(season))

                away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
                home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text

                away_teams.append(away_team)
                home_teams.append(home_team)

                gm_dates.append(date)

                mls = event.find_elements_by_class_name('pointer-2j4Dk')
                
                away_moneyline = []
                home_moneyline = []
                
                
                for i, ml in enumerate(mls):
                    if i%2==0:
                        away_moneyline.append(ml.text)
                    else:
                        home_moneyline.append(ml.text)
                
                away_moneyline = ",".join(away_moneyline)
                home_moneyline = ",".join(home_moneyline)

                away_mls.append(away_moneyline)
                home_mls.append(home_moneyline)
                
            driver.quit()
            
        clear_output(wait=True)
        
    df = pd.DataFrame({'SEASON':seasons,
                       'GM_DATE':gm_dates,
                       'AWAY_TEAM':away_teams, 
                      'HOME_TEAM':home_teams,
                      'AWAY_ML':away_mls,
                      'HOME_ML':home_mls,
                                         })
    
    df = df.sort_values(['GM_DATE']).reset_index(drop=True)
    
    df.to_sql(table_name, conn, if_exists='append', index=False)

    cur = connection.cursor()
    cur.execute('''DELETE FROM moneylines 
                    WHERE rowid NOT IN (SELECT MIN(rowid) FROM moneylines
                                        GROUP BY GM_DATE, AWAY_TEAM, HOME_TEAM)''')
    conn.commit()
    
    return df

In [15]:
def get_team_abbreviation(team_name):
    """Convert full team name to the 3-letter abbreviation for gameID"""
    team_abbreviations = {
        'Atlanta Hawks': 'ATL', 'Boston Celtics': 'BOS', 'Brooklyn Nets': 'BKN', 'Charlotte Hornets': 'CHA',
        'Chicago Bulls': 'CHI', 'Cleveland Cavaliers': 'CLE', 'Dallas Mavericks': 'DAL', 'Denver Nuggets': 'DEN',
        'Detroit Pistons': 'DET', 'Golden State Warriors': 'GSW', 'Houston Rockets': 'HOU', 'Indiana Pacers': 'IND',
        'LA Clippers': 'LAC', 'Los Angeles Lakers': 'LAL', 'Memphis Grizzlies': 'MEM', 'Miami Heat': 'MIA',
        'Milwaukee Bucks': 'MIL', 'Minnesota Timberwolves': 'MIN', 'New Orleans Pelicans': 'NOP', 'New York Knicks': 'NYK',
        'Oklahoma City Thunder': 'OKC', 'Orlando Magic': 'ORL', 'Philadelphia 76ers': 'PHI', 'Phoenix Suns': 'PHX',
        'Portland Trail Blazers': 'POR', 'Sacramento Kings': 'SAC', 'San Antonio Spurs': 'SAS', 'Toronto Raptors': 'TOR',
        'Utah Jazz': 'UTA', 'Washington Wizards': 'WAS'
    }
    return team_abbreviations.get(team_name, 'UNKNOWN')

In [16]:
def generate_game_id(game_date, home_team):
    """Generate the gameID using the format yyyymmdd0{hometeam abbreviation}"""
    date_str = game_date.strftime('%Y%m%d')
    home_abbreviation = get_team_abbreviation(home_team)
    return f"{date_str}0{home_abbreviation}"

In [20]:
# Path to your ChromeDriver
path = '/Users/kainoa/Desktop/bettingalgo/chromedriver-mac-x64/chromedriver'
service = Service(path)

print("hello")

# Data lists
seasons = []
gameID = []
away_teams = []
home_teams = []
away_scoreboards = []
home_scoreboards = []
away_spreads = []
home_spreads = []

# Example of how to use the function for a single season
dates = get_game_dates(2023)
season = 2023

for date in tqdm(dates[:3], desc='progress'):
    web = f'https://www.sportsbookreview.com/betting-odds/nba-basketball/?date={date}'
    driver = webdriver.Chrome(service=service, options=options)
    driver.get(web)
    
    try:
        single_row_events = driver.find_elements(By.CLASS_NAME, 'eventMarketGridContainer-3QipG')
        print(single_row_events)
    except:
        print(f"No Data for {date}")
        continue

    num_postponed_events = len(driver.find_elements(By.CLASS_NAME, 'eventStatus-3EHqw'))
    num_listed_events = len(single_row_events)
    cutoff = num_listed_events - num_postponed_events

    for event in single_row_events[:cutoff]:
        away_team = event.find_elements(By.CLASS_NAME, 'participantBox-3ar9Y')[0].text
        home_team = event.find_elements(By.CLASS_NAME, 'participantBox-3ar9Y')[1].text
        print(home_team)
        away_teams.append(away_team)
        home_teams.append(home_team)
        gameID.append(generate_game_id(date, home_team))
        seasons.append(season_string(season))

        scoreboard = event.find_elements(By.CLASS_NAME, 'scoreboard-1TXQV')
        print("found scoreboard")
        home_score = []
        away_score = []

        for score in scoreboard:
            quarters = score.find_elements(By.CLASS_NAME, 'scoreboardColumn-2OtpR')
            for i in range(len(quarters)):
                scores = quarters[i].text.split('\n')
                away_score.append(scores[0])
                home_score.append(scores[1])

            home_score = ",".join(home_score)
            away_score = ",".join(away_score)

            away_scoreboards.append(away_score)
            home_scoreboards.append(home_score)

        # Ensure scoreboard lengths match team lists
        if len(away_scoreboards) != len(away_teams):
            num_to_add = len(away_teams) - len(away_scoreboards)
            for _ in range(num_to_add):
                away_scoreboards.append('')
                home_scoreboards.append('')

        # Get spreads
        spreads = event.find_elements(By.CLASS_NAME, 'pointer-2j4Dk')
        away_lines = []
        home_lines = []
        for i in range(len(spreads)):    
            if i % 2 == 0:
                away_lines.append(spreads[i].text)
            else:
                home_lines.append(spreads[i].text)

        away_lines = ",".join(away_lines)
        home_lines = ",".join(home_lines)

        away_spreads.append(away_lines)
        home_spreads.append(home_lines)

        if len(away_spreads) != len(away_teams):
            num_to_add = len(away_teams) - len(away_spreads)
            for _ in range(num_to_add):
                away_scoreboards.append('')
                home_scoreboards.append('')

    driver.quit()

# Construct the DataFrame
df = pd.DataFrame({
    'SEASON': seasons, 
    'GM_DATE': gameID,
    'AWAY_TEAM': away_teams,
    'HOME_TEAM': home_teams,
    'AWAY_SCOREBOARD': away_scoreboards,
    'HOME_SCOREBOARD': home_scoreboards,
    'AWAY_SPREAD': away_spreads,
    'HOME_SPREAD': home_spreads
})

hello


progress:   0%|          | 0/3 [00:00<?, ?it/s]

[]


progress:  67%|██████▋   | 2/3 [00:42<00:19, 19.20s/it]

No Data for 2023-10-25


progress:  67%|██████▋   | 2/3 [00:45<00:22, 22.84s/it]


KeyboardInterrupt: 

In [14]:
df

Unnamed: 0,SEASON,GM_DATE,AWAY_TEAM,HOME_TEAM,AWAY_SCOREBOARD,HOME_SCOREBOARD,AWAY_SPREAD,HOME_SPREAD
