In [2]:
from bs4 import BeautifulSoup
import urllib.request

In [24]:
class BoxScoreEntry:
    def __init__(self, team, name, columns):
        
        self.team = team
        self.name = name
        
        self.minutes = columns[0].contents[0]
        
        self.field_goals_made = columns[1].contents[0]
        self.field_goals_attemped = columns[2].contents[0]

        self.threes_made = columns[4].contents[0]
        self.threes_attempted = columns[5].contents[0]

        self.free_throws_made = columns[7].contents[0]
        self.free_throws_attempted = columns[8].contents[0]

        self.offensive_rebounds = columns[10].contents[0]
        self.defensive_rebounds = columns[11].contents[0]
        
        self.assists = columns[13].contents[0]
        self.steals = columns[14].contents[0]
        self.blocks = columns[15].contents[0]
        self.turnovers = columns[16].contents[0]
        self.personal_fouls = columns[17].contents[0]
        self.points = columns[18].contents[0]

In [31]:
ref_url = "https://www.basketball-reference.com"

def get_boxscore_links(year, month, day):
    
    links = []
    
    with urllib.request.urlopen(f"{ref_url}/boxscores/?month={month}&day={day}&year={year}") as response:
        html = response.read()
        soup = BeautifulSoup(html, 'html.parser')
        
        for anchor in [x for x in soup.find_all('a') if x.get('href').startswith("/boxscores/2018") ]:
            href = anchor.get("href")
            
            if href not in links:
                links.append(href)
        
    return links

def get_boxscore_details(soup):
    
    def skip_box_score_row(r):
        row_text = r.get_text()
        return "Basic Box" in row_text \
                or "Advanced Box" in row_text \
                or "Starters" in row_text \
                or "Reserves" in row_text \
                or "Team Totals" in row_text

    for table in soup.find_all('table'):

        # box score tables start with ids like box_mil_basic, box_mil_advanced, etc
        if "id" not in table.attrs:
            continue

        table_id = table["id"]
        
        if not table_id.startswith("box_"):
            continue
            
        if table_id.endswith("_advanced"):
            continue
            
        print(table_id)
            
        team_name = table_id.replace("box_", "").replace("_basic", "")
        
        print(team_name)

        for r in table.find_all('tr'):

            # skip rows that are separators
            if skip_box_score_row(r):
                continue

            name = r.find("th").find("a").contents[0]
            columns = r.find_all("td")

            entry = BoxScoreEntry(team_name, name, columns)
            
            print(entry.name,entry.points)

In [32]:
get_boxscore_details(soup)

box_mil_basic
mil
Khris Middleton 17
Giannis Antetokounmpo 24
Eric Bledsoe 26
Malcolm Brogdon 20
Brook Lopez 2
Ersan Ilyasova 5
Pat Connaughton 15
Tony Snell 6
Donte DiVincenzo 4
John Henson 4
Thon Maker 8
Sterling Brown 3
Matthew Dellavedova 0
box_gsw_basic
gsw
Klay Thompson 24
Kevin Durant 17
Stephen Curry 10
Damian Jones 12
Jordan Bell 4
Kevon Looney 8
Alfonzo McKinnie 7
Quinn Cook 15
Andre Iguodala 6
Jonas Jerebko 0
Damion Lee 8


In [4]:
links = get_boxscore_links(2018,11,8)

In [5]:
with urllib.request.urlopen(f"{ref_url}/boxscores/201811080GSW.html") as response:
    html = response.read()
    soup = BeautifulSoup(html, 'html.parser')