In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd

In [2]:
#a list of URLs for each player in the NBA, and we want to scrape the per game stats for each player
#we want to create a function that takes in a URL and returns a dataframe of the per game stats for that player
def get_per_game_stats(link):
    res = requests.get(link)
    #the code that could potentially have an error is put in a try clause; the program execution moves to the start of a following except clause if an error happens
    try:
        res.raise_for_status()
    except Exception as exc:
        print('There was a problem: %s' % (exc))
    soup = BeautifulSoup(res.content, 'html.parser')
    per_game_stats = soup.find('table', class_ = 'stats_table', id = 'per_game')
    #find_all() method returns a list of Tag objects, which is how Beautiful Soup represents an HTML element
    per_game_stats_labels = per_game_stats.find('tr', class_ = 'full_table').find_all('td')
    per_game_stats_labels_list = []
    for i in per_game_stats_labels:
        per_game_stats_labels_list.append(i['data-stat'])
    per_game_stats_list = []
    per_game_stats_rows = per_game_stats.find_all('tr', class_ = 'full_table')
    for i in per_game_stats_rows:
        per_game_stats_row = i.find_all('td')
        per_game_stats_row_list = []
        for j in per_game_stats_row:
            per_game_stats_row_list.append(j.text)
        per_game_stats_list.append(per_game_stats_row_list)
    #grab seasons_list
    seasons_list = []
    for i in per_game_stats_rows:
        seasons_list.append(i.find('th').text)
    #per_game_stats_list.insert(0, seasons_list)
    seasons_list = [*set(seasons_list)]
    seasons_list.sort()
    #grab player_name
    player_name = soup.find('h1').text.strip()
    #create dataframe
    df = pd.DataFrame(per_game_stats_list, columns = per_game_stats_labels_list)
    df.insert(0, 'Season', seasons_list)
    df.insert(1, 'Player', player_name)
    return df


In [3]:
#create function to grab stats from multiple URLs
def get_per_game_stats_multiple_links(links):
    df_list = []
    for i in links:
        df_list.append(get_per_game_stats(i))
    df = pd.concat(df_list)
    return df

In [9]:
get_per_game_stats_multiple_links(['https://www.basketball-reference.com/players/j/jamesle01.html'
                                  ,'https://www.basketball-reference.com/players/b/bryanko01.html'
                                  ,'https://www.basketball-reference.com/players/d/duranke01.html'
                                  ,'https://www.basketball-reference.com/players/c/curryst01.html'     
                                  ,'https://www.basketball-reference.com/players/h/hardeja01.html'
                                  ,'https://www.basketball-reference.com/players/w/westbru01.html'
                                  ,'https://www.basketball-reference.com/players/a/antetgi01.html'
                                  ,'https://www.basketball-reference.com/players/l/lillada01.html'
                                  ,'https://www.basketball-reference.com/players/g/goberru01.html'
                                  ,'https://www.basketball-reference.com/players/g/georgpa01.html'
                                  ,'https://www.basketball-reference.com/players/d/davisan02.html'
                                  ,'https://www.basketball-reference.com/players/l/lowryky01.html'
                                  ,'https://www.basketball-reference.com/players/g/griffbl01.html'
                                  ,'https://www.basketball-reference.com/players/p/paulch01.html'
                                  ,'https://www.basketball-reference.com/players/b/bookede01.html'                                                               
                                   ]).to_csv('basketball_reference.csv', index=False)