In [3]:
import requests
import re
import pandas as pd
from lxml import html

## Helper Functions

In [4]:
def grab_player_key(player_url):
    "Returns player key from the bball reference player url"
    return re.search('(?<=/)[^/]+(?=.html)', player_url).group(0)

## Per Game Scraping - To Pandas Dataframe

In [5]:
import pandas as pd

def create_per_game_dataframe(player_url):
    """Create a dataframe with the Per Game stats of a player"""
    
    # Read html, find appropriate table
    page = requests.get(player_url)
    tree = html.fromstring(page.content)
    per_game_table_rows = tree.xpath('//*[@id="per_game"]/*/tr')
    
    # each element of per_game_cols will contain the name of the column, and the list of values in said column
    per_game_cols = []
    
    age_column_index = -1
    season_index = -1

    # Get Column Headers
    column_index = 0
    for header in per_game_table_rows[0]:
        name = header.text_content()
        if name.upper() == "AGE":
            age_column_index = column_index
        elif name.upper() == "SEASON":
            season_index = column_index

        per_game_cols.append( (name, []) )
        column_index += 1

    # add another column for player key
    per_game_cols.append( ("PlayerKey", []))

    if age_column_index == -1:
        raise RuntimeError("Age column index not found")
    if season_index == -1:
        raise RuntimeError("Season column index not found")
    print("Age Index: ", age_column_index)
    print("Season Index: ", season_index)

    player_key = grab_player_key(player_url)
    for row_index in range(1, len(per_game_table_rows)):
        row_element = per_game_table[row_index]

        # check if the column is an aggregate Career row, in which case we skip
        if row_element[age_column_index].text_content() == "":
            continue

        #Iterate through each element of the row
        column_index = 0
        for element in row_element:
            data = element.text_content()

            # format the season number to the last year
            if column_index == season_index:
                data = int(data[0:4]) + 1
            else:
                #Convert any numerical value to integers
                try:
                    data=int(data)
                except:
                    pass

            per_game_cols[column_index][1].append(data)
            column_index += 1 
        
        # add the player key to each column
        per_game_cols[column_index][1].append(player_key)

    Dict={title:column for (title,column) in per_game_cols}
    df=pd.DataFrame(Dict)
    
    return df


### Per Game Scraping - Example

In [2]:
url = "https://www.basketball-reference.com/players/a/antetgi01.html"
create_per_game_dataframe(url)

NameError: name 'create_per_game_dataframe' is not defined