### Code to pull the per game player data of a player

In [1]:
import requests
import re
import pandas as pd
from lxml import html

## Helper Functions

In [2]:
def grab_player_key(player_url):
    "Returns player key from the bball reference player url"
    return re.search('(?<=/)[^/]+(?=.html)', player_url).group(0)

## Per Game Scraping - To Pandas Dataframe

In [5]:
import pandas as pd

def create_per_game_dataframe(player_url):
    """Create a dataframe with the Per Game stats of a player"""
    
    # Read html, find appropriate table
    page = requests.get(player_url)
    tree = html.fromstring(page.content)
    per_game_table_rows = tree.xpath('//*[@id="per_game"]/*/tr')
    
    # each element of per_game_cols will contain the name of the column, and the list of values in said column
    per_game_cols = []
    
    age_column_index = -1
    season_index = -1

    # Get Column Headers
    column_index = 0
    for header in per_game_table_rows[0]:
        name = header.text_content()
        if name.upper() == "AGE":
            age_column_index = column_index
        elif name.upper() == "SEASON":
            season_index = column_index

        per_game_cols.append( (name, []) )
        column_index += 1

    # add another column for player key
    per_game_cols.append( ("PlayerKey", []))

    if age_column_index == -1:
        raise RuntimeError("Age column index not found")
    if season_index == -1:
        raise RuntimeError("Season column index not found")
    print("Age Index: ", age_column_index)
    print("Season Index: ", season_index)

    player_key = grab_player_key(player_url)
    for row_index in range(1, len(per_game_table_rows)):
        row_element = per_game_table_rows[row_index]

        # check if the column is an aggregate Career row, in which case we skip
        if row_element[age_column_index].text_content() == "":
            continue

        #Iterate through each element of the row
        column_index = 0
        for element in row_element:
            data = element.text_content()

            # format the season number to the last year
            if column_index == season_index:
                data = int(data[0:4]) + 1
            else:
                #Convert any numerical value to integers
                try:
                    data=int(data)
                except:
                    pass

            per_game_cols[column_index][1].append(data)
            column_index += 1 
        
        # add the player key to each column
        per_game_cols[column_index][1].append(player_key)

    Dict={title:column for (title,column) in per_game_cols}
    df=pd.DataFrame(Dict)
    
    return df


### Per Game Scraping - Example

In [6]:
url = "https://www.basketball-reference.com/players/a/antetgi01.html"
create_per_game_dataframe(url)

Age Index:  1
Season Index:  0


Unnamed: 0,Season,Age,Tm,Lg,Pos,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,PlayerKey
0,2014,19,MIL,NBA,SF,77,23,24.6,2.2,5.4,0.414,0.5,1.5,0.347,1.7,3.9,0.44,0.463,1.8,2.6,0.683,1.0,3.4,4.4,1.9,0.8,0.8,1.6,2.2,6.8,antetgi01
1,2015,20,MIL,NBA,SG,81,71,31.4,4.7,9.6,0.491,0.1,0.5,0.159,4.6,9.1,0.511,0.496,3.2,4.3,0.741,1.2,5.5,6.7,2.6,0.9,1.0,2.1,3.1,12.7,antetgi01
2,2016,21,MIL,NBA,PG,80,79,35.3,6.4,12.7,0.506,0.4,1.4,0.257,6.1,11.3,0.537,0.52,3.7,5.1,0.724,1.4,6.2,7.7,4.3,1.2,1.4,2.6,3.2,16.9,antetgi01
3,2017,22,MIL,NBA,SF,80,80,35.6,8.2,15.7,0.521,0.6,2.3,0.272,7.6,13.5,0.563,0.541,5.9,7.7,0.77,1.8,7.0,8.8,5.4,1.6,1.9,2.9,3.1,22.9,antetgi01
4,2018,23,MIL,NBA,PF,75,75,36.7,9.9,18.7,0.529,0.6,1.9,0.307,9.3,16.8,0.554,0.545,6.5,8.5,0.76,2.1,8.0,10.0,4.8,1.5,1.4,3.0,3.1,26.9,antetgi01
5,2019,24,MIL,NBA,PF,72,72,32.8,10.0,17.3,0.578,0.7,2.8,0.256,9.3,14.5,0.641,0.599,6.9,9.5,0.729,2.2,10.3,12.5,5.9,1.3,1.5,3.7,3.2,27.7,antetgi01
6,2020,25,MIL,NBA,PF,63,63,30.4,10.9,19.7,0.553,1.4,4.7,0.304,9.5,15.0,0.631,0.589,6.3,10.0,0.633,2.2,11.4,13.6,5.6,1.0,1.0,3.7,3.1,29.5,antetgi01
