In [1]:
import os
import sys
from datetime import datetime, timezone, timedelta
from urllib.request import urlopen
import logging
from bs4 import BeautifulSoup
from sqlalchemy import exc, create_engine
import pymysql
import numpy as np
import pandas as pd
import boto3
from botocore.exceptions import ClientError

logging.basicConfig(filename='example.log', level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logging.info('Starting Logging Function')

today = datetime.now().date()
yesterday = today - timedelta(1)
day = (datetime.now() - timedelta(1)).day
month = (datetime.now() - timedelta(1)).month
year = (datetime.now() - timedelta(1)).year
season_type = 'Regular Season'


In [303]:
def sql_connection():
    try:
        connection = create_engine('mysql+pymysql://' + os.environ.get('RDS_USER') + ':' + os.environ.get('RDS_PW') + '@' + os.environ.get('IP') + ':' + '3306' + '/' + os.environ.get('RDS_DB'),
                     echo = False)
        logging.info('SQL Connection Successful')
        print('SQL Connection Successful')
        return(connection)
    except exc.SQLAlchemyError as e:
        logging.info('SQL Connection Failed, Error:', e)
        print('SQL Connection Failed, Error:', e)
        return(e)


In [2]:
def get_player_stats():
    try:
        year = 2021
        url = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html".format(year)
        html = urlopen(url)
        soup = BeautifulSoup(html)

        headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
        headers = headers[1:]

        rows = soup.findAll('tr')[1:]
        player_stats = [[td.getText() for td in rows[i].findAll('td')]
            for i in range(len(rows))]

        stats = pd.DataFrame(player_stats, columns = headers)
        stats['PTS'] = pd.to_numeric(stats['PTS'])
        logging.info(f'General Stats Function Successful, retrieving {len(stats)} updated rows')
        print(f'General Stats Function Successful, retrieving {len(stats)} updated rows')
        return(stats)
    except ValueError:
        logging.info("General Stats Function Failed for Today's Games")
        print("General Stats Function Failed for Today's Games")
        df = []
        return(df)

In [305]:
conn = sql_connection()

SQL Connection Successful


In [3]:
def get_boxscores(month = month, day = day, year = year):
    url = "https://www.basketball-reference.com/friv/dailyleaders.fcgi?month={}&day={}&year={}&type=all".format(month, day, year)
    html = urlopen(url)
    soup = BeautifulSoup(html)

    try: 
        headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
        headers = headers[1:]
        headers[1] = 'Team'
        headers[2] = "Location"
        headers[3] = 'Opponent'
        headers[4] = "Outcome"
        headers[6] = "FGM"
        headers[8] = "FGPercent"
        headers[9] = "threePFGMade"
        headers[10] = "threePAttempted"
        headers[11] = "threePointPercent"
        headers[14] = "FTPercent"
        headers[15] = "OREB"
        headers[16] = "DREB"
        headers[24] = 'PlusMinus'

        rows = soup.findAll('tr')[1:]
        player_stats = [[td.getText() for td in rows[i].findAll('td')]
            for i in range(len(rows))]

        df = pd.DataFrame(player_stats, columns = headers)
        df[['FGM', 'FGA', 'FGPercent', 'threePFGMade', 'threePAttempted', 'threePointPercent', 'OREB', 'DREB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PlusMinus', 'GmSc']] = df[['FGM', 'FGA', 'FGPercent', 'threePFGMade', 'threePAttempted', 'threePointPercent','OREB', 'DREB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PlusMinus', 'GmSc']].apply(pd.to_numeric)
        df['Date'] = yesterday
        df['Type'] = season_type
        df['Season'] = 2022
        df['Location'] = df['Location'].apply(lambda x: 'A' if x == '@' else 'H')
        df['Team'] = df['Team'].str.replace("PHO", "PHX")
        df['Team'] = df['Team'].str.replace("CHO", "CHA")
        df['Team'] = df['Team'].str.replace("BRK", "BKN")
        df['Opponent'] = df['Opponent'].str.replace("PHO", "PHX")
        df['Opponent'] = df['Opponent'].str.replace("CHO", "CHA")
        df['Opponent'] = df['Opponent'].str.replace("BRK", "BKN")
        logging.info(f'Box Score Function Successful, retrieving {len(df)} rows for {yesterday}')
        print(f'Box Score Function Successful, retrieving {len(df)} rows for {yesterday}')
        return(df)
    except IndexError:
        logging.info(f"Box Score Function Failed, no data available for {yesterday}")
        print(f"Box Score Function Failed, no data available for {yesterday}")
        df = []
        return(df)

In [369]:
df1 = get_boxscores(month = 6, day = 1, year = 2021)

Box Score Function Successful, retrieving 72 rows for 2021-09-04


In [3]:
df = get_boxscores()

Box Score Function Failed, no data available for 2021-09-03


In [222]:
yesterday_hometeams = df.query('Location == "H"')[['Team']].drop_duplicates().dropna()
yesterday_hometeams['Team'] = yesterday_hometeams['Team'].str.replace("PHX", "PHO")
yesterday_hometeams['Team'] = yesterday_hometeams['Team'].str.replace("CHA", "CHO")
yesterday_hometeams['Team'] = yesterday_hometeams['Team'].str.replace("BKN", "BRK")

In [129]:
# work on this tmmw 2020 12 23 has like 12 games so try that
url = "https://www.basketball-reference.com/boxscores/pbp/202012220BRK.html"
df = pd.read_html(url)[0]
df = df.droplevel(0, axis = 'columns')
df = df.rename(columns={df.columns[1]: 'Away', df.columns[2]: 'AwayScore', df.columns[4]: 'HomeScore', df.columns[5]: 'Home'})
df

Unnamed: 0,Time,Away,AwayScore,Score,HomeScore,Home
0,12:00.0,Jump ball: J. Wiseman vs. D. Jordan (J. Harris...,Jump ball: J. Wiseman vs. D. Jordan (J. Harris...,Jump ball: J. Wiseman vs. D. Jordan (J. Harris...,Jump ball: J. Wiseman vs. D. Jordan (J. Harris...,Jump ball: J. Wiseman vs. D. Jordan (J. Harris...
1,11:50.0,,,0-0,,Turnover by D. Jordan (bad pass)
2,11:38.0,Shooting foul by K. Irving (drawn by S. Curry),,0-0,,
3,11:38.0,S. Curry makes free throw 1 of 2,+1,1-0,,
4,11:38.0,S. Curry makes free throw 2 of 2,+1,2-0,,
...,...,...,...,...,...,...
517,0:30.0,,,96-125,,Turnover by B. Brown (bad pass)
518,0:10.0,J. Wiseman misses 2-pt jump shot from 9 ft,,96-125,,
519,0:08.0,Offensive rebound by D. Lee,,96-125,,
520,0:07.0,M. Mulder makes 3-pt jump shot from 26 ft (ass...,+3,99-125,,


In [11]:
newdate = yesterday.strftime("%Y%m%d")
pracdate = '20201223'

In [523]:
def get_pbp_data(df):
    if (len(df) > 0):
        yesterday_hometeams = df.query('Location == "H"')[['Team']].drop_duplicates().dropna()
        yesterday_hometeams['Team'] = yesterday_hometeams['Team'].str.replace("PHX", "PHO")
        yesterday_hometeams['Team'] = yesterday_hometeams['Team'].str.replace("CHA", "CHO")
        yesterday_hometeams['Team'] = yesterday_hometeams['Team'].str.replace("BKN", "BRK")

        away_teams = df.query('Location == "A"')[['Team', 'Opponent']].drop_duplicates().dropna()
        away_teams = away_teams.rename(columns = {away_teams.columns[0]: 'AwayTeam', away_teams.columns[1]: 'HomeTeam'})
    else:
        yesterday_hometeams = []

    if (len(yesterday_hometeams) > 0):
        try:
            pracdate = '20210601' # use this for url format 1 for prac.
            # newdate = yesterday.strftime("%Y%m%d")
            pbp_list = pd.DataFrame()
            for i in yesterday_hometeams['Team']:
                url = "https://www.basketball-reference.com/boxscores/pbp/{}0{}.html".format(pracdate, i)
                df = pd.read_html(url)[0]
                df.columns = df.columns.map(''.join)
                df = df.rename(columns={df.columns[0]: 'Time', df.columns[1]: 'descriptionPlayVisitor', df.columns[2]: 'AwayScore', df.columns[3]: 'Score', df.columns[4]: 'HomeScore', df.columns[5]: 'descriptionPlayHome'})
                conditions = [
                    (df['HomeScore'].str.contains('Jump ball:', na = False) & df['Time'].str.contains('12:00.0')),
                    (df['HomeScore'].str.contains('Start of 2nd quarter', na = False)),
                    (df['HomeScore'].str.contains('Start of 3rd quarter', na = False)),
                    (df['HomeScore'].str.contains('Start of 4th quarter', na = False)),
                    (df['HomeScore'].str.contains('Start of 1st overtime', na = False)),
                    (df['HomeScore'].str.contains('Start of 2nd overtime', na = False)),
                    (df['HomeScore'].str.contains('Start of 3rd overtime', na = False)),
                    (df['HomeScore'].str.contains('Start of 4th overtime', na = False))]
                values = ['1st Quarter', '2nd Quarter', '3rd Quarter', '4th Quarter', '1st OT', '2nd OT', '3rd OT', '4th OT']
                df['Quarter'] = np.select(conditions, values, default = None)
                df['Quarter'] = df['Quarter'].fillna(method = 'ffill')
                df = df.query('Time != "Time" & Time != "2nd Q" & Time != "3rd Q" & Time != "4th Q" & Time != "1st OT" & Time != "2nd OT" & Time != "3rd OT" & Time != "4th OT"')
                df['HomeTeam'] = i
                df['HomeTeam'] = df['HomeTeam'].str.replace("PHO", "PHX")
                df['HomeTeam'] = df['HomeTeam'].str.replace("CHO", "CHA")
                df['HomeTeam'] = df['HomeTeam'].str.replace("BRK", "BKN")
                df = df.merge(away_teams)
                df[['scoreAway', 'scoreHome']] = df['Score'].str.split('-', expand = True)
                df['scoreAway'] = pd.to_numeric(df['scoreAway'], errors = 'coerce')
                df['scoreAway'] = df['scoreAway'].fillna(method = 'ffill')
                df['scoreAway'] = df['scoreAway'].fillna(0)
                df['scoreHome'] = pd.to_numeric(df['scoreHome'], errors = 'coerce')
                df['scoreHome'] = df['scoreHome'].fillna(method = 'ffill')
                df['scoreHome'] = df['scoreHome'].fillna(0)
                df['marginScore'] = df['scoreHome'] - df['scoreAway']
                df['Date'] = yesterday
                df = df.rename(columns = {df.columns[0]: 'timeQuarter', df.columns[6]: 'numberPeriod'})
                pbp_list = pbp_list.append(df)
                df = pd.DataFrame()
            return(pbp_list)
        except ValueError:
            logging.info("PBP Function Failed for Yesterday's Games")
            print("PBP Function Failed for Yesterday's Games")
            df = []
            return(df)
    else:
        df = []
        logging.info("PBP Function No Data Yesterday")
        print("PBP Function No Data Yesterday")
        return(df)


In [65]:
away_teams = df1.query('Location == "A"')[['Team', 'Opponent']].drop_duplicates().dropna()
away_teams = away_teams.rename(columns = {away_teams.columns[0]: 'AwayTeam', away_teams.columns[1]: 'HomeTeam'})

In [469]:
del df

In [525]:
pbp_data = get_pbp_data(df1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HomeTeam'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HomeTeam'] = df['HomeTeam'].str.replace("PHO", "PHX")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HomeTeam'] = df['HomeTeam'].str.replace("CHO", "CHA")
A value is trying to be set on a copy of a slice from a DataFrame.
Try

In [519]:
pbp_data.to_clipboard()

In [527]:

pbp_data.to_clipboard()

In [263]:
url = "https://www.basketball-reference.com/boxscores/pbp/202012230MEM.html"
df = pd.read_html(url)[0]
df.columns = df.columns.map(''.join)
df = df.rename(columns={df.columns[0]: 'Time', df.columns[1]: 'Away', df.columns[2]: 'AwayScore', df.columns[3]: 'Score', df.columns[4]: 'HomeScore', df.columns[5]: 'Home'})
df = df.query('Time != "Time" & Time != "2nd Q" & Time != "3rd Q" & Time != "4th Q"')
df['Time'] = df['Time'].astype(str).str[:-3]
df.to_clipboard()


In [513]:
pbp_data['marginScore'] = pbp_data['scoreHome'] - pbp_data['scoreAway']

In [511]:
pbp_data

Unnamed: 0,Time,descriptionPlayVisitor,AwayScore,Score,HomeScore,descriptionPlayHome,Quarter,HomeTeam,AwayTeam,scoreAway,scoreHome,Date,marginScore
0,12:00.0,Jump ball: T. Thompson vs. K. Durant (M. Smart...,Jump ball: T. Thompson vs. K. Durant (M. Smart...,Jump ball: T. Thompson vs. K. Durant (M. Smart...,Jump ball: T. Thompson vs. K. Durant (M. Smart...,Jump ball: T. Thompson vs. K. Durant (M. Smart...,1st Quarter,BKN,BOS,0.0,0.0,2021-09-04,0.0
1,11:43.0,M. Smart misses 3-pt jump shot from 26 ft,,0-0,,,1st Quarter,BKN,BOS,0.0,0.0,2021-09-04,0.0
2,11:40.0,,,0-0,,Defensive rebound by K. Irving,1st Quarter,BKN,BOS,0.0,0.0,2021-09-04,0.0
3,11:28.0,,,0-0,,K. Irving misses 3-pt jump shot from 25 ft (bl...,1st Quarter,BKN,BOS,0.0,0.0,2021-09-04,0.0
4,11:24.0,Defensive rebound by T. Thompson,,0-0,,,1st Quarter,BKN,BOS,0.0,0.0,2021-09-04,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,0:18.0,,,85-115,,J. Carter misses free throw 2 of 2,4th Quarter,PHX,LAL,85.0,115.0,2021-09-04,-30.0
466,0:15.0,Defensive rebound by T. Horton-Tucker,,85-115,,,4th Quarter,PHX,LAL,85.0,115.0,2021-09-04,-30.0
467,0:05.0,T. Horton-Tucker misses 3-pt jump shot from 34 ft,,85-115,,,4th Quarter,PHX,LAL,85.0,115.0,2021-09-04,-30.0
468,0:03.0,,,85-115,,Defensive rebound by J. Carter,4th Quarter,PHX,LAL,85.0,115.0,2021-09-04,-30.0


In [301]:
pbp_data2.head(15)
pbp_data2['Time'][0]

'12:00'

In [419]:
pbp_data.to_sql(con = conn, name = 'pbp_prac', index = False, if_exists = 'replace')

In [265]:
df.index.nlevels
df.columns.nlevels
df['Time'].describe

<bound method NDFrame.describe of 0      12:0
1      11:4
2      11:4
3      11:4
4      11:4
       ... 
474     0:2
475     0:2
476     0:2
477     0:0
478     0:0
Name: Time, Length: 473, dtype: object>

In [136]:
df2 = df['Team'] # df2 returns a Series with only the team column
df2 = df[['Team']] # df2 returns a dataframe with only the team column
df2['Team'] = df2['Team'].str.replace("PHO", "PHX") # df2 returns the whole df while fixing the team col
# https://www.basketball-reference.com/boxscores/202107200MIL.html
# https://www.basketball-reference.com/boxscores/pbp/202107200MIL.html

In [190]:
def get_injuries():
    try:
        url = "https://www.basketball-reference.com/friv/injuries.fcgi"
        df = pd.read_html(url)[0]
        df = df.rename(columns = {"Update": "Date"})
        df1 = df['Description'].str.split(pat = ' - ', expand = True)
        df2 = df1[0].str.split(pat = '\\(', expand = True)
        df3 = df2[1].str.rstrip(')')
        df_final = df[['Player', 'Team' ,'Date']]
        df_final['Status'] = df2[0]
        df_final['Injury'] = df3[1]
        df_final['Description'] = df1[1]
        logging.info(f'Injury Function Successful, retrieving {len(df_final)} rows')
        print(f'Injury Function Successful, retrieving {len(df_final)} rows')
        return(df_final)
    except ValueError:
        logging.info("Injury Function Failed for Today's Games")
        print("Injury Function Failed for Today's Games")
        df = []
        return(df)

In [198]:
df = get_injuries()

Injury Function Successful, retrieving 17 rows


In [6]:
def get_transactions():
    url = "https://www.basketball-reference.com/leagues/NBA_2021_transactions.html"
    html = urlopen(url)
    soup = BeautifulSoup(html)
    trs = soup.findAll('li')[71:] # theres a bunch of garbage in the first 71 rows - no matter what 
    rows = []
    mylist = []
    for tr in trs:
        date = tr.find('span')
        if date is not None: # needed bc span can be null (multi <p> elements per span)
            date = date.text
        data = tr.findAll('p')
        for p in data:
            mylist.append(p.text)
        data3 = [date] + [mylist]
        rows.append(data3)
        mylist = []

    transactions = pd.DataFrame(rows)
    transactions.columns = ['Date', 'Transaction']
    transactions = transactions.explode('Transaction')
    transactions['Date'] = pd.to_datetime(transactions['Date'])
    transactions = transactions.query('Date != "NaN"')
    transactions
    logging.info(f'Transactions Function Successful, retrieving {len(transactions)} rows')
    print(f'Transactions Function Successful, retrieving {len(transactions)} rows')
    return(transactions)

In [8]:
url = "https://www.basketball-reference.com/leagues/NBA_2022_transactions.html"
html = urlopen(url)
soup = BeautifulSoup(html)
trs = soup.findAll('li')[50:] # theres a bunch of garbage in the first 71 rows - no matter what 
rows = []
mylist = []
for tr in trs:
        date = tr.find('span')
        if date is not None: # needed bc span can be null (multi <p> elements per span)
            date = date.text
        data = tr.findAll('p')
        for p in data:
            mylist.append(p.text)
        data3 = [date] + [mylist]
        rows.append(data3)
        mylist = []
transactions = pd.DataFrame(rows)
transactions.columns = ['Date', 'Transaction']
transactions = transactions.explode('Transaction')
transactions['Date'] = pd.to_datetime(transactions['Date'])
transactions = transactions.query('Date != "NaN"')
transactions

Unnamed: 0,Date,Transaction
18,2021-07-29,The Los Angeles Clippers traded cash and a 202...
19,2021-07-30,The Memphis Grizzlies waived Jontay Porter.
19,2021-07-30,The Oklahoma City Thunder traded cash and a 20...
19,2021-07-30,The Indiana Pacers traded Georgios Kalaitzakis...
19,2021-07-30,The Charlotte Hornets traded a 2022 1st round ...
...,...,...
46,2021-08-28,The Chicago Bulls signed Lauri Markkanen to a ...
46,2021-08-28,"In a 3-team trade, the Chicago Bulls traded La..."
47,2021-08-30,The Philadelphia 76ers signed Grant Riller to ...
48,2021-08-31,The Los Angeles Lakers signed Rajon Rondo.


In [3]:
def schedule_scraper(month):
    try:
        global schedule_df
        url = "https://www.basketball-reference.com/leagues/NBA_2022_games-{}.html".format(month)
        html = urlopen(url)
        soup = BeautifulSoup(html)

        headers = [th.getText() for th in soup.findAll('tr')[0].findAll('th')]
        headers[6] = 'boxScoreLink'
        headers[7] = 'isOT'
        headers = headers[1:]

        rows = soup.findAll('tr')[1:]
        date_info = [[th.getText() for th in rows[i].findAll('th')]
                for i in range(len(rows))]

        game_info = [[td.getText() for td in rows[i].findAll('td')]
                for i in range(len(rows))]
        date_info = [i[0] for i in date_info]

        schedule = pd.DataFrame(game_info, columns = headers)
        schedule['Date'] = date_info

        schedule_df = schedule_df.append(schedule)
        logging.info(f'Schedule Function Completed for {month}, retrieving {len(schedule_df)} rows')
        print(f'Schedule Function Completed for {month}, retrieving {len(schedule_df)} rows')
    except ValueError:
        logging.info("Schedule Scraper Function Failed")
        print("Schedule Scraper Function Failed")
        df = []
        return(df)

month_list = ['october', 'november', 'december', 'january', 'february', 'march', 'april']
schedule_df = pd.DataFrame()
for month in month_list:
    schedule_scraper(month)

Schedule Function Completed for october, retrieving 93 rows
Schedule Function Completed for november, retrieving 318 rows
Schedule Function Completed for december, retrieving 538 rows
Schedule Function Completed for january, retrieving 765 rows
Schedule Function Completed for february, retrieving 925 rows
Schedule Function Completed for march, retrieving 1150 rows
Schedule Function Completed for april, retrieving 1230 rows


In [5]:
conn = sql_connection()
schedule_df.to_sql(con = conn, name = 'aws_schedule_table', index = False, if_exists = 'append')

SQL Connection Successful


In [39]:
def get_advanced_stats():
    try:
        url = "https://www.basketball-reference.com/leagues/NBA_2021.html"
        df = pd.read_html(url)
        df = pd.DataFrame(df[10])
        df.drop(columns = df.columns[0], 
            axis=1, 
            inplace=True)

        df.columns = ['Team', 'Age', 'W', 'L', 'PW', 'PL', 'MOV', 'SOS', 'SRS', 'ORTG', 'DRTG', 'NRTG', 'Pace', 'FTr', '3PAr', 'TS%', 'bby1', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', 'bby2', 'eFG%_opp', 'TOV%_opp', 'DRB%_opp', 'FT/FGA_opp', 'bby3', 'Arena', 'Attendance', 'Att/Game']
        df.drop(['bby1', 'bby2', 'bby3'], axis = 1, inplace = True)
        df = df.query('Team != "League Average"')
        logging.info(f'Advanced Stats Function Successful, retrieving updated data for 30 Teams')
        print(f'Advanced Stats Function Successful, retrieving updated data for 30 Teams')
        return(df)
    except ValueError:
        logging.info("Advanced Stats Function Failed for Today's Games")
        print("Advanced Stats Function Failed for Today's Games")
        df = []
        return(df)

In [2]:
def get_odds():
    try:
        url = "https://sportsbook.draftkings.com/leagues/basketball/88673861?category=game-lines&subcategory=game"
        df = pd.read_html(url)
        data1 = df[0]
        data2 = df[1]
        data2 = data2.rename(columns = {"Tomorrow": "Today"})
        data = data1.append(data2)
        data
        data['SPREAD'] = data['SPREAD'].str[:-4]
        data['TOTAL'] = data['TOTAL'].str[:-4]
        data['TOTAL'] = data['TOTAL'].str[2:]
        data.reset_index(drop = True)
        data

        data['Today'] = data['Today'].str.replace("AM|PM", " ")
        data['Today'] = data['Today'].str.split().str[1:2]
        data['Today'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in data['Today']])
        data = data.rename(columns = {"Today": "team", "SPREAD": "spread", "TOTAL": "total_pts", "MONEYLINE": "moneyline"})
        logging.info(f'Odds Function Successful, retrieving {len(data)} rows')
        print(f'Odds Function Successful, retrieving {len(data)} rows')
        return(data)
    except ValueError:
        logging.info("Odds Function Failed for Today's Games")
        print("Odds Function Failed for Today's Games")
        data = []
        return(data)

In [3]:
odds = get_odds()

Odds Function Failed for Today's Games


In [10]:
conn = sql_connection()
stats = get_player_stats()
boxscores = get_boxscores(month = month, day = yesterday, year = year)
injury_data = get_injuries()
transactions = get_transactions()
adv_stats = get_advanced_stats()
odds = get_odds()

SQL Connection Successful
General Stats Function Successful, retrieving 731 updated rows
Box Score Function Successful, retrieving 17 rows for 2021-08-25
Injury Function Successful, retrieving 17 rows
Transactions Function Successful, retrieving 823 rows
Advanced Stats Function Successful, retrieving updated data for 30 Teams
Odds Function Failed for Today's Games


In [312]:
def write_to_sql(data, table_type):
    data_name = [ k for k,v in globals().items() if v is data][0]
    if len(data) == 0:
        print(data_name + " Failed, not writing to SQL")
        logging.info(data_name + " Failed, not writing to SQL")
    else:
        # ^ this disgusting monstrosity is to get the name of the -fucking- dataframe lmfao
        data.to_sql(con = conn, name = ("aws_" + data_name + "_table"), index = False, if_exists = table_type)
        print("Writing aws_" + data_name + "_table to SQL")
        logging.info("Writing " + data_name + " table to SQL")

In [314]:
conn = sql_connection()
write_to_sql(pbp_list, "append")

SQL Connection Successful
Writing pbp_list table to SQL


In [2]:
logs = pd.read_csv('example.log', sep=r'\\t', engine='python', header = None)
logs = logs.rename(columns = {0 : "errors"})
logs = logs.query("errors.str.contains('Failed')", engine = "python")

In [62]:
len(logs)

4

In [63]:
def send_email_function():
    try:
        if len(logs) > 0:
            print(logs)
            # Send email
        elif len(logs) == 0:
            print('No Errors!')
            ## DONT SEND EMAIL
    except ValueError:
        print('oof')

In [98]:
send_email_function()

                                               errors
24  08/25/2021 09:05:26 PM Odds Function Failed fo...


In [None]:
write_to_sql(stats, "replace")
write_to_sql(boxscores, "append")
write_to_sql(injury_data, "append")
write_to_sql(transactions, "replace")
write_to_sql(adv_stats, "replace")
write_to_sql(odds, "append")

In [12]:
stats = get_player_stats()

General Stats Function Successful, retrieving 731 updated rows


In [64]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

def sendEmail():
    email = os.environ.get("USER_EMAIL") # the email where you sent the email
    password = os.environ.get("USER_PW")
    send_to_email = os.environ.get("USER_EMAIL") # for whom
    message = '''\
<h3>sup hoe here are the errors.</h3>
                   {}'''.format(logs.to_html())

    msg = MIMEMultipart()
    msg["From"] = email
    msg["To"] = send_to_email
    msg["Subject"] = str(len(logs)) +" Alert Fails for " + str(today) + ' Python NBA Web Scrape'
    msg.attach(MIMEText(message, 'html'))

    server = smtplib.SMTP("smtp.gmail.com", 587)
    server.starttls()
    server.login(email, password)
    text = msg.as_string()
    server.sendmail(email, send_to_email, text)
    server.quit()

In [65]:
sendEmail()

In [5]:
def send_aws_email():
    sender = os.environ.get("USER_EMAIL")
    recipient = os.environ.get("USER_EMAIL")
    aws_region = 'us-east-1'
    subject = str(len(logs)) +" Alert Fails for " + str(today) + ' Python NBA Web Scrape'
    body_html = message = '''\
<h3>sup hoe here are the errors.</h3>
                   {}'''.format(logs.to_html())

    charset = "UTF-8"
    client = boto3.client('ses',region_name=aws_region)
    try:
    #Provide the contents of the email.
        response = client.send_email(
            Destination={
                'ToAddresses': [
                    recipient,
                ],
            },
            Message={
                'Body': {
                    'Html': {
                        'Charset': charset,
                        'Data': body_html,
                    },
                    'Text': {
                        'Charset': charset,
                        'Data': body_html,
                    },
                },
                'Subject': {
                    'Charset': charset,
                    'Data': subject,
                },
            },
            Source = sender
            # If you are not using a configuration set, comment or delete the
            # following line
            # ConfigurationSetName=CONFIGURATION_SET,
    )
# Display an error if something goes wrong.	
    except ClientError as e:
        print(e.response['Error']['Message'])
    else:
        print("Email sent! Message ID:"),
        print(response['MessageId'])

In [6]:
send_aws_email()

Email sent! Message ID:
0100017bb2099fb1-62c47da2-d260-4268-8a6e-b150c0e6b787-000000


In [51]:
def send_email_function():
    try:
        if len(logs) > 0:
            print('Sending Email')
            sendEmail()
        elif len(logs) == 0:
            print('No Errors!')
            ## DONT SEND EMAIL
    except ValueError:
        print('oof')

In [57]:

send_email_function()

No Errors!
