In [1]:
from bs4 import BeautifulSoup
import requests 
import pandas as pd
import lxml
import psycopg2
import argparse

In [2]:
from pgConnect import PgConnection
from requestLimiter import RequestLimiter
from config import Config
from typing import Dict

In [3]:
def get_ith_table(soup, i, **kwargs):
    # Get and return table
    tables = soup.find_all('table', **kwargs)
    if len(tables) > 0:
        table = tables[i]
        return table
    else:
        print("No table found on this HTML page!")
        

In [5]:
config : Config = Config('config.ini')
# reader 
read_constants : Dict[str, str] = config.parse_section('reader')
BASE : str = read_constants['base']
NAME : str = BASE[BASE.find('.') + 1:]

# requestLimiter
rl_constants : Dict[str, str] = config.parse_section('requestLimiter')
load_loc = rl_constants['load_location']
LOAD_FILE : str = f'{load_loc}{NAME}.p'
INTERVAL : int = int(rl_constants['interval'])
LIMIT : int = int(rl_constants['limit'])

# ======
# 2. Parse args
# ======
YEAR : int = 2023

rl : RequestLimiter = RequestLimiter(BASE, 
                    interval = INTERVAL, 
                    limit = LIMIT, 
                    load = LOAD_FILE)
bases = {'summary_base' :BASE + f'/leagues/NBA_{YEAR}.html',
            'schedule_base' : BASE + '/leagues/NBA_%s_games-%s.html'}

Successfully loaded previous Rate Limiter info for https://www.basketball-reference.com
Initialized with 0 of 20 entries filled

Saving RequestLimiter status to disk...


In [6]:
def learn_teams(link : str, rl : RequestLimiter) -> Dict[str, str]:
    tm_dict = {}
    data = rl.get(requests.get, link)
    if not data:
        print("Couldn't get information in learn_teams() function!")
        return 
    data = data.text
    soup = BeautifulSoup(data, 'html.parser')
    
    table = get_ith_table(soup, 4, class_ = 'stats_table')
    if table:
        rows = table.findChildren(['tr'])
        for row in rows:
            for a in row.find_all('a'):
                tm_dict[a.text] = BASE + a.get('href')
    else:
        print("Previously hit rate limit on website!")
    return tm_dict


In [59]:
def get_team_info(team : str, link : str, rl : RequestLimiter):
    data =rl.get(requests.get, link)
    if not data:
        print(f"Unable to retrieve team info for {team}!")
        return
    soup = BeautifulSoup(data.text, 'html.parser')
    arena = get_arena(soup)
    roster = read_ith_table(soup, 0, id = 'roster')
    roster_table = get_ith_table(soup, 0, id = 'roster')
    print(roster_table)
    return arena, roster, roster_table

        
def get_arena(soup):
    # Find arena
    a = soup.find_all('div', id = 'meta')[0]
    p = a.find_all('p')[-1]
    arena = p.contents[2].strip()
    return arena

  
def read_ith_table(soup, i, **kwargs):
    table = get_ith_table(soup, i, **kwargs)
    if table:
        return pd.read_html(str(table), flavor='html5lib')[0]    


In [60]:
def make_team_tuple(tm, stadium):
    tm_tup = (YEAR, tm, stadium)
    return tm_tup

ValueError: 0 columns passed, passed data had 8 columns

In [144]:
def process_player_table(player_table):
    table = player_table
    thead = player_table.find('thead')
    data = []
    column_names = [th.text.strip() for th in thead.find_all('th')]
    had_link = set()
    for row in table.find_all(['tr']):
        row_data = []
        link_data = []
        for num, td in enumerate(row.find_all(['td','th'])):
            not_link = None
            if ''.join(td.text.strip()):
                not_link = ''.join(td.text.strip())
            row_data.append(not_link)

            if td.find('a'):
                link = td.a['href']
                link_data.append(link)
                had_link.add(num)
        set_cols = True
        data.append(row_data + link_data)

    had_link = list(had_link)
    had_link.sort()
    for val in had_link:
       column_names.append(column_names[val] + '_link')

    df = pd.DataFrame(data[1:], columns= column_names)
    return df

process_player_table(dat)

Unnamed: 0,No.,Player,Pos,Ht,Wt,Birth Date,Unnamed: 7,Exp,College,Player_link,College_link
0,9.0,Derrick White,SG,6-4,190,"July 2, 1994",us,5,"Colorado-Colorado Springs, Colorado",/players/w/whitede01.html,/friv/colleges.fcgi?college=coloradocs
1,30.0,Sam Hauser,PF,6-8,215,"December 8, 1997",us,1,"Marquette, Virginia",/players/h/hausesa01.html,/friv/colleges.fcgi?college=marquette
2,12.0,Grant Williams,PF,6-6,236,"November 30, 1998",us,3,Tennessee,/players/w/willigr01.html,/friv/colleges.fcgi?college=tennessee
3,0.0,Jayson Tatum,PF,6-8,210,"March 3, 1998",us,5,Duke,/players/t/tatumja01.html,/friv/colleges.fcgi?college=duke
4,7.0,Jaylen Brown,SF,6-6,223,"October 24, 1996",us,6,California,/players/b/brownja02.html,/friv/colleges.fcgi?college=california
5,36.0,Marcus Smart,PG,6-3,220,"March 6, 1994",us,8,Oklahoma State,/players/s/smartma01.html,/friv/colleges.fcgi?college=okstate
6,13.0,Malcolm Brogdon,PG,6-5,229,"December 11, 1992",us,6,Virginia,/players/b/brogdma01.html,/friv/colleges.fcgi?college=virginia
7,40.0,Luke Kornet,C,7-2,250,"July 15, 1995",us,5,Vanderbilt,/players/k/kornelu01.html,/friv/colleges.fcgi?college=vandy
8,42.0,Al Horford,C,6-9,240,"June 3, 1986",do,15,Florida,/players/h/horfoal01.html,/friv/colleges.fcgi?college=florida
9,11.0,Payton Pritchard,PG,6-1,195,"January 28, 1998",us,2,Oregon,/players/p/pritcpa01.html,/friv/colleges.fcgi?college=oregon


In [65]:
guy = None
def load_teams(year : int, bases : Dict[str, str], rl : RequestLimiter):
    team_links : Dict[str, str] = learn_teams(bases['summary_base'], rl)
    tl = dict((k, team_links[k]) for k in ['Boston Celtics'])
    for tm, link in tl.items():
        stadium, player_df, player_table = get_team_info(tm, tl[tm], rl)
        guy = player_table
        return player_table
        # process_player_table(player_table)
        # tm_tup = make_team_tuple(tm, stadium)
        # print(tm_tup)
    # return


In [66]:
guy

In [71]:
dat = load_teams(year = YEAR, bases = bases, rl = rl)

Successfully processed append to queue...
Size of current queue... 3
Saving RequestLimiter status to disk...
Successfully processed append to queue...
Size of current queue... 4
Saving RequestLimiter status to disk...
<table class="sortable stats_table" data-cols-to-freeze=",2" id="roster">
<caption>Roster Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="No." class="poptip sort_default_asc center" data-stat="number" data-tip="Uniform Number" scope="col">No.</th>
<th aria-label="Player" class="poptip sort_default_asc center" data-stat="player" scope="col">Player</th>
<th aria-label="Pos" class="poptip sort_default_asc center" data-stat="pos" data-tip="Position" scope="col">Pos</th>
<th aria-label="Ht" class="poptip sort_default_asc center" data-stat="height" data-tip="Height" scope="col">Ht</th>
<th aria-label="Wt" class="poptip sort_default_asc center" data-stat="weight" data-tip="Weight" scope="col">Wt</th>
<th ar

In [72]:
dat

<table class="sortable stats_table" data-cols-to-freeze=",2" id="roster">
<caption>Roster Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="No." class="poptip sort_default_asc center" data-stat="number" data-tip="Uniform Number" scope="col">No.</th>
<th aria-label="Player" class="poptip sort_default_asc center" data-stat="player" scope="col">Player</th>
<th aria-label="Pos" class="poptip sort_default_asc center" data-stat="pos" data-tip="Position" scope="col">Pos</th>
<th aria-label="Ht" class="poptip sort_default_asc center" data-stat="height" data-tip="Height" scope="col">Ht</th>
<th aria-label="Wt" class="poptip sort_default_asc center" data-stat="weight" data-tip="Weight" scope="col">Wt</th>
<th aria-label="Birth Date" class="poptip sort_default_asc center" data-stat="birth_date" scope="col">Birth Date</th>
<th aria-label=" " class="poptip center" data-stat="birth_country" scope="col"> </th>
<th aria-label="Exp"

In [14]:
dat[0]

'TD Garden'

In [24]:
dat[1].head()

Unnamed: 0,No.,Player,Pos,Ht,Wt,Birth Date,Unnamed: 6,Exp,College
0,9.0,Derrick White,SG,6-4,190,"July 2, 1994",us,5,"Colorado-Colorado Springs, Colorado"
1,30.0,Sam Hauser,PF,6-8,215,"December 8, 1997",us,1,"Marquette, Virginia"
2,12.0,Grant Williams,PF,6-6,236,"November 30, 1998",us,3,Tennessee
3,0.0,Jayson Tatum,PF,6-8,210,"March 3, 1998",us,5,Duke
4,7.0,Jaylen Brown,SF,6-6,223,"October 24, 1996",us,6,California


In [None]:
https://www.basketball-reference.com/players/w/whitede01.html

In [21]:
pg_conn = PgConnection(config)
conn = pg_conn.getConn()
cur = conn.cursor()

Connecting to the PostgreSQL database...


In [23]:
# conn
cur

<cursor object at 0x11f0666d0; closed: 0>

In [7]:
conn = psycopg2.connect(
    database="postgres",
    user="postgres",
    password="REBELStennis123",
    host="dfs-db.ce9d6aqnvhtq.us-east-1.rds.amazonaws.com",
    port='5432'
)

In [5]:
def connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = config()

        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
		
        # create a cursor
        cur = conn.cursor()
        
	# execute a statement
        print('PostgreSQL database version:')
        cur.execute('SELECT version()')

        # display the PostgreSQL database server version
        db_version = cur.fetchone()
        print(db_version)
       
	# close the communication with the PostgreSQL
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')


In [6]:
connect()

Connecting to the PostgreSQL database...


NameError: name 'psycopg2' is not defined

In [2]:
years = [i for i in range(2015, 2024)]
a

In [3]:
link = 'https://www.basketball-reference.com/leagues/NBA_2022_games.html'
playoffs_link = 'https://www.basketball-reference.com/playoffs/NBA_2022.html'

In [9]:
data = requests.get(link).text
soup = BeautifulSoup(data, 'html.parser')

In [16]:
tables = soup.find_all('table', class_ = 'stats_table')
table = tables[0]
df = pd.read_html(str(table), flavor='html5lib')[0]
print(df.head())

                Date Start (ET)        Visitor/Neutral  PTS  \
0  Tue, Oct 19, 2021      7:30p          Brooklyn Nets  104   
1  Tue, Oct 19, 2021     10:00p  Golden State Warriors  121   
2  Wed, Oct 20, 2021      7:00p         Indiana Pacers  122   
3  Wed, Oct 20, 2021      7:00p          Chicago Bulls   94   
4  Wed, Oct 20, 2021      7:30p         Boston Celtics  134   

         Home/Neutral  PTS.1 Unnamed: 6 Unnamed: 7  Attend.  \
0     Milwaukee Bucks    127  Box Score        NaN    17341   
1  Los Angeles Lakers    114  Box Score        NaN    18997   
2   Charlotte Hornets    123  Box Score        NaN    15521   
3     Detroit Pistons     88  Box Score        NaN    20088   
4     New York Knicks    138  Box Score        2OT    19812   

                        Arena  Notes  
0                Fiserv Forum    NaN  
1            Crypto.com Arena    NaN  
2             Spectrum Center    NaN  
3        Little Caesars Arena    NaN  
4  Madison Square Garden (IV)    NaN  


In [None]:
https://www.basketball-reference.com/leagues/NBA_2021_games-october.html

In [9]:
game_data = Boxscore('201806080CLE')

IndexError: list index out of range

In [7]:
teams = Teams()

The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?


In [5]:

teams = Teams('2021')
for team in teams:
    print(team.name)  # Prints the team's name
    print(team.blocks)  # Prints the team's total blocked shots

The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
