In [3]:
import os
import sys
from datetime import datetime, timezone, timedelta
from urllib.request import urlopen
import logging
from bs4 import BeautifulSoup
from sqlalchemy import exc, create_engine
import pymysql
import numpy as np
import pandas as pd
import boto3
from botocore.exceptions import ClientError

logging.basicConfig(filename='example.log', level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logging.info('Starting Logging Function')

today = datetime.now().date()
yesterday = today - timedelta(1)
day = (datetime.now() - timedelta(1)).day
month = (datetime.now() - timedelta(1)).month
year = (datetime.now() - timedelta(1)).year
season_type = 'Regular Season'


In [10]:
def sql_connection():
    try:
        connection = create_engine('postgresql+psycopg2://' + os.environ.get('RDS_USER') + ':' + os.environ.get('RDS_PW') + '@' + os.environ.get('IP') + ':' + '5432' + '/' + os.environ.get('RDS_DB'),
                                    connect_args = {'options': '-csearch_path=nba_source'}, # defining schema to connect to
                     echo = False)
        logging.info('SQL Connection Successful')
        print('SQL Connection Successful')
        return(connection)
    except exc.SQLAlchemyError as e:
        logging.info('SQL Connection Failed, Error:', e)
        print('SQL Connection Failed, Error:', e)
        return(e)

In [11]:
conn = sql_connection()

SQL Connection Successful


In [1]:
def get_odds():
    """
    Web Scrape function w/ pandas read_html that grabs current day's nba odds
    Args:
        None
    Returns:
        Pandas DataFrame of NBA moneyline + spread odds for upcoming games for that day
    """
    try:
        url = "https://sportsbook.draftkings.com/leagues/basketball/88670846?category=game-lines&subcategory=game"
        df = pd.read_html(url)

        data1 = df[0].copy()
        date_try = str(year) + " " + data1.columns[0]
        data1['date'] = np.where(date_try == '2021 Today', datetime.now().date(), str(year) + " " + data1.columns[0])
        # date_try = pd.to_datetime(date_try, errors="coerce", format="%Y %a %b %dth")
        date_try = data1['date'].iloc[0]
        data1.columns.values[0] = "Today"
        data1.reset_index(drop=True)
        data1['Today'] = data1['Today'].str.replace("LA Clippers", "LAC Clippers", regex = True)
        data1["Today"] = data1["Today"].str.replace("AM", "AM ", regex=True)
        data1["Today"] = data1["Today"].str.replace("PM", "PM ", regex=True)
        data1["Time"] = data1["Today"].str.split().str[0]
        data1["datetime1"] = pd.to_datetime(date_try.strftime("%Y-%m-%d") + " " + data1["Time"]
        ) - timedelta(hours=5)

        data2 = df[1].copy()
        data2.columns.values[0] = "Today"
        data2.reset_index(drop=True)
        data2['Today'] = data2['Today'].str.replace("LA Clippers", "LAC Clippers", regex = True)
        data2["Today"] = data2["Today"].str.replace("AM", "AM ", regex=True)
        data2["Today"] = data2["Today"].str.replace("PM", "PM ", regex=True)
        data2["Time"] = data2["Today"].str.split().str[0]
        data2["datetime1"] = (
            pd.to_datetime(date_try.strftime("%Y-%m-%d") + " " + data2["Time"])
            - timedelta(hours=5)
            + timedelta(days=1)
        )
        data2["date"] = data2["datetime1"].dt.date

        data = data1.append(data2).reset_index(drop=True)
        data["SPREAD"] = data["SPREAD"].str[:-4]
        data["TOTAL"] = data["TOTAL"].str[:-4]
        data["TOTAL"] = data["TOTAL"].str[2:]
        data["Today"] = data["Today"].str.split().str[1:2]
        data["Today"] = pd.DataFrame(
            [str(line).strip("[").strip("]").replace("'", "") for line in data["Today"]]
        )
        data["SPREAD"] = data["SPREAD"].str.replace("pk", "-1", regex=True)
        data["SPREAD"] = data["SPREAD"].str.replace("+", "", regex=True)
        data.columns = data.columns.str.lower()
        data = data[["today", "spread", "total", "moneyline", "date", "datetime1"]]
        data = data.rename(columns={data.columns[0]: "team"})
        data = data.query("date == date.min()")  # only grab games from upcoming day
        logging.info(f"Odds Function Successful, retrieving {len(data)} rows")
        print(f"Odds Function Successful, retrieving {len(data)} rows")
        return data
    except ValueError:
        logging.info("Odds Function Failed for Today's Games")
        print("Odds Function Failed for Today's Games")
        data = []
        return data

In [4]:
df = get_odds()

Odds Function Successful, retrieving 6 rows


In [9]:
df2 = df.query('team == "LAC"')

In [12]:
df2.to_sql(con = conn, name = "aws_odds_source", if_exists = 'append', index = False)

In [14]:
url = "https://sportsbook.draftkings.com/leagues/basketball/88670846?category=game-lines&subcategory=game"
df = pd.read_html(url)

In [15]:
data2 = df[1]
data2.columns.values[0] = "Today"
data3 = data2.copy()
data3 = data3.reset_index()
data3['Today'] = data3['Today'].str.replace("PM", "PM ", regex = True)

In [28]:
url = "https://sportsbook.draftkings.com/leagues/basketball/88670846?category=game-lines&subcategory=game"
df = pd.read_html(url)
data1 = df[0].copy()
date_try = str(year) + " " + data1.columns[0]
data1['date'] = np.where(date_try == '2021 Today', datetime.now().date(), str(year) + " " + data1.columns[0])
# date_try = pd.to_datetime(date_try, errors="coerce", format="%Y %a %b %dth")
date_try = data1['date'].iloc[0]
data1.columns.values[0] = "Today"
data1.reset_index(drop=True)
data1["Today"] = data1["Today"].str.replace("AM", "AM ", regex=True)
data1["Today"] = data1["Today"].str.replace("PM", "PM ", regex=True)
data1["Time"] = data1["Today"].str.split().str[0]
data1["datetime1"] = pd.to_datetime(date_try.strftime("%Y-%m-%d") + " " + data1["Time"]
) - timedelta(hours=5)

In [27]:
date_try = data1['date'].iloc[0]

In [33]:
url = "https://sportsbook.draftkings.com/leagues/basketball/88670846?category=game-lines&subcategory=game"
df = pd.read_html(url)

data1 = df[0].copy()
date_try = str(year) + " " + data1.columns[0]
data1['date'] = np.where(date_try == '2021 Today', datetime.now().date(), str(year) + " " + data1.columns[0])
# date_try = pd.to_datetime(date_try, errors="coerce", format="%Y %a %b %dth")
date_try = data1['date'].iloc[0]
data1.columns.values[0] = "Today"
data1.reset_index(drop=True)
data1["Today"] = data1["Today"].str.replace("AM", "AM ", regex=True)
data1["Today"] = data1["Today"].str.replace("PM", "PM ", regex=True)
data1["Time"] = data1["Today"].str.split().str[0]
data1["datetime1"] = pd.to_datetime(date_try.strftime("%Y-%m-%d") + " " + data1["Time"]
) - timedelta(hours=5)

data2 = df[1].copy()
data2.columns.values[0] = "Today"
data2.reset_index(drop=True)
data2["Today"] = data2["Today"].str.replace("AM", "AM ", regex=True)
data2["Today"] = data2["Today"].str.replace("PM", "PM ", regex=True)
data2["Time"] = data2["Today"].str.split().str[0]
data2["datetime1"] = (
    pd.to_datetime(date_try.strftime("%Y-%m-%d") + " " + data2["Time"])
    - timedelta(hours=5)
    + timedelta(days=1)
)
data2["date"] = data2["datetime1"].dt.date

data = data1.append(data2).reset_index(drop=True)
data["SPREAD"] = data["SPREAD"].str[:-4]
data["TOTAL"] = data["TOTAL"].str[:-4]
data["TOTAL"] = data["TOTAL"].str[2:]
data["Today"] = data["Today"].str.split().str[1:2]
data["Today"] = pd.DataFrame(
    [str(line).strip("[").strip("]").replace("'", "") for line in data["Today"]]
)
data["SPREAD"] = data["SPREAD"].str.replace("pk", "-1", regex=True)
data["SPREAD"] = data["SPREAD"].str.replace("+", "", regex=True)
data.columns = data.columns.str.lower()
data = data[["today", "spread", "total", "moneyline", "date", "datetime1"]]
data = data.rename(columns={data.columns[0]: "team"})
data = data.query("date == date.min()")  # only grab games from upcoming day