In [1]:
#Import libraries 
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

#Paste player links for the 1990s Hall of Fame class
player_urls = [
    "https://www.basketball-reference.com/players/m/mchalke01.html", #Kevin McHale 1999
    "https://www.basketball-reference.com/players/b/birdla01.html", #Larry Bird 1998
    "https://www.basketball-reference.com/players/r/risenar01.html", #Arnie Risen 1998
    "https://www.basketball-reference.com/players/e/englial01.html", #Alex English 1997
    "https://www.basketball-reference.com/players/h/howelba01.html", #Bailey Howell 1997
    "https://www.basketball-reference.com/players/g/gervige01.html", #George Gervin 1996
    "https://www.basketball-reference.com/players/g/goodrga01.html", #Gail Goodrich 1996
    "https://www.basketball-reference.com/players/t/thompda01.html", #David Thompson 1996
    "https://www.basketball-reference.com/players/y/yardlge01.html", #George Yardley 1996
    "https://www.basketball-reference.com/players/a/abdulka01.html", #Kareem Abdul-Jabbar 1995
    "https://www.basketball-reference.com/players/m/mikkeve01.html", #Vern Mikkelsen 1995
    "https://www.basketball-reference.com/players/j/jeannbu01.html", #Buddy Jeanette 1994
    "https://www.basketball-reference.com/players/b/bellawa01.html", #Walt Bellamy 1993
    "https://www.basketball-reference.com/players/e/ervinju01.html", #Julius Erving 1993
    "https://www.basketball-reference.com/players/i/isselda01.html", #Dan Issel 1993
    "https://www.basketball-reference.com/players/m/mcguidi01.html", #Dick McGuire 1993
    "https://www.basketball-reference.com/players/m/murphca01.html", #Calvin Murphy 1993
    "https://www.basketball-reference.com/players/w/waltobi01.html", #Bill Walton 1993
    "https://www.basketball-reference.com/players/h/hawkico01.html", #Connie Hawkins 1992
    "https://www.basketball-reference.com/players/l/laniebo01.html", #Bob Lanier 1992
    "https://www.basketball-reference.com/players/a/architi01.html", #Tony Archibald 1991
    "https://www.basketball-reference.com/players/c/cowenda01.html", #Dave Cowens 1991
    "https://www.basketball-reference.com/players/g/gallaha01.html", #Harry Gallatin 1991
    "https://www.basketball-reference.com/players/b/bingda01.html", #Dave Bing 1990
    "https://www.basketball-reference.com/players/h/hayesel01.html", #Elvin Hayes 1990
    "https://www.basketball-reference.com/players/j/johnsne01.html", #Neil Johnston 1990
    "https://www.basketball-reference.com/players/m/monroea01.html" #Earl Malone 1990
]

#Headers for the request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

#Initialize a list to store all players' data
all_players_data = []

#Player position abbreviations
position_map = {
    'Center': 'C',
    'Power Forward': 'PF',
    'Small Forward': 'SF',
    'Shooting Guard': 'SG',
    'Point Guard': 'PG'
}

#Function to clean and extract player position
def get_position(soup):
    position = None  #Default position if not found
    p_elems = soup.find_all('p')

    for p_elem in p_elems:
        if 'Position:' in p_elem.get_text():
            position_text = p_elem.get_text(separator=" ").split("Position:")[1].strip()
            position_text = position_text.replace('▪', '').strip()
            if 'Shoots:' in position_text:
                position_text = position_text.split('Shoots:')[0].strip()
            position_text = " ".join(position_text.split())
            position_text = position_text.replace(' and ', ', ')
            positions = position_text.split(',')
            primary_position = positions[0].strip()
            position = position_map.get(primary_position, primary_position)
            break
    return position



#Function to safely extract MVP count
def get_mvp_count(soup):
    mvp_count = 0
    mvp_elem = soup.find_all('li', {'class': 'poptip'}, string=lambda s: s and 'MVP' in s and 'Finals' not in s and 'AS' not in s and 'MBWA NBA' not in s)
    for elem in mvp_elem:
        text = elem.text.strip()
        if 'x' in text:
            mvp_count += int(text.split('x')[0])  #For players with multiple MVPs, split on 'x' and extract the first element
        else:
            mvp_count += 1  #For players with only 1 MVP
    return mvp_count

#Function to safely extract Scoring Championships count
def get_scoring_champ_count(soup):
    scoring_champ_count = 0
    scoring_champ_elem = soup.find_all('li', {'data-tip': lambda x: x and 'NBA Scoring Champ' in x})
    for elem in scoring_champ_elem:
        text = elem.text.strip()
        if 'x' in text:
            scoring_champ_count += int(text.split('x')[0])  # For players with multiple scoring championships, split on 'x'
        else:
            scoring_champ_count += 1  # For players with just 1 scoring championship
    return scoring_champ_count

#Function to safely extract NBA Championships count
def get_chips_count(soup):
    chips_count = 0
    chip_elem = soup.find_all('li', class_='', string=lambda s: s and ('NBA Champ' in s or 'ABA Champ' in s or 'BAA Champ' in s))
    for elem in chip_elem:
        text = elem.text.strip()
        if 'x' in text:
            chips_count += int(text.split('x')[0])  #For players with multiple championships, split on 'x'
        else:
            chips_count += 1  #For players with 1 championship
    return chips_count

#Loop over each player URL to scrape their data
for url in player_urls:
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        try:
            #Get player name
            player_name = soup.find('h1').find('span').text.strip()

            #Get player position
            position = get_position(soup)

            #Get player career length
            career_length_elem = soup.find('strong', string=lambda s: s and 'Career Length:' in s)
            career_length = int(career_length_elem.next_sibling.strip().split()[0])

            #Safeguard function for extracting stats
            def safe_find(tag, text):
                element = soup.find('span', {'data-tip': text})
                if element:
                    return element.find_next('p').find_next('p').text.strip()
                return 0.0

            games = int(safe_find('Games', 'Games'))
            ppg = float(safe_find('Points', 'Points'))
            rpg = float(safe_find('Total Rebounds', 'Total Rebounds'))
            apg = float(safe_find('Assists', 'Assists'))

            #Extract PER
            per_elem = soup.find('span', {'data-tip': lambda x: x and 'Player Efficiency Rating' in x})
            per = float(per_elem.find_next('p').find_next('p').text.strip() if per_elem else 0.0)

            #Extract Field Goal Percentage and Free Throw Percentage
            fg_pct = float(safe_find('Field Goal Percentage', 'Field Goal Percentage'))
            ft_pct = float(safe_find('Free Throw Percentage', 'Free Throw Percentage'))

            #Extract Win Shares
            win_shares_elem = soup.find('span', {'data-tip': lambda x: x and 'Win Shares' in x})
            win_shares = float(win_shares_elem.next_sibling.find_next('p').text.strip() if win_shares_elem else 0.0)
            
            #Extract awards counts
            mvp_count = get_mvp_count(soup)
            scoring_champ_count = get_scoring_champ_count(soup)
            chips_count = get_chips_count(soup)

            #Extract All-Stars, All-NBA, All-Defense, and other honors
            all_stars = int(soup.find('li', {'class': 'all_star'}).find('a').text.strip().split('x')[0] if soup.find('li', {'class': 'all_star'}) else 0)
            #Extract All-NBA, All-ABA, or All-BAA awards
            def award_count(soup, award_name, tag='a', attributes=None):
                #Find the award element
                elem = soup.find(tag, attributes, string=lambda s: s and award_name in s)
                if elem:
                    if 'x' in elem.text:
                        return int(elem.text.strip().split('x')[0])
                    else:
                        return 1
                else:
                    return 0
            all_nba = award_count(soup, 'All-NBA', tag='li', attributes={'class':  ""})
            all_aba = award_count(soup, 'All-ABA')
            all_baa = award_count(soup, 'All-BAA')
            all_nba_total = all_nba + all_aba + all_baa
            
            all_defense_count = sum([int(a.text.strip().split('x')[0]) if 'x' in a.text else 1 for a in soup.find('li', {'class': 'poptip'}, string=lambda s: s and 'All-Defensive' in s).find('a')]) if soup.find_all('li', {'class': 'poptip'}, string=lambda s: s and 'All-Defensive' in s) else 0
            all_rookie = 1 if soup.find('li', {'data-tip': lambda s: s and 'All-Rookie' in s}) else 0
            roy = 1 if soup.find('li', {'class': 'poptip'}, string=lambda s: s and 'ROY' in s and 'MBWA NBA' not in s) else 0
            dpoy_count = sum([int(text.split('x')[0]) if 'x' in text else 1 for text in [elem.text.strip() for elem in soup.find_all('li', class_='poptip', string=lambda s: s and 'Def. POY' in s)]])
            
            #All players are in the Hall of Fame
            hof = 1

            #Store the data
            player_data = {
                'Name': player_name,
                'Position': position,
                'Games': games,
                'Career Length': career_length,
                'PPG': ppg,
                'RPG': rpg,
                'APG': apg,
                'PER': per,
                'FG%': fg_pct,
                'FT%': ft_pct,
                'Win Shares': win_shares,
                'All-Stars': all_stars,
                'All-NBA': all_nba_total,
                'All-Defense': all_defense_count,
                'All-Rookie Team': all_rookie,
                'MVPs': mvp_count,
                'Chips': chips_count,
                'ROY': roy,
                'DPOYs': dpoy_count,
                'Scoring Champ': scoring_champ_count,
                'HOF': hof
            }
            all_players_data.append(player_data)

        except Exception as e:
            print(f"Error scraping data for {url}: {e}")

        time.sleep(1)  #Be polite with requests, avoid overwhelming the server

#Create a DataFrame from the collected data
df_1990s = pd.DataFrame(all_players_data)

Error scraping data for https://www.basketball-reference.com/players/j/jeannbu01.html: could not convert string to float: '-'


In [2]:
#Make sure all columns are viewable
pd.set_option('display.max_columns', None)

In [3]:
df_1990s

Unnamed: 0,Name,Position,Games,Career Length,PPG,RPG,APG,PER,FG%,FT%,Win Shares,All-Stars,All-NBA,All-Defense,All-Rookie Team,MVPs,Chips,ROY,DPOYs,Scoring Champ,HOF
0,Kevin McHale,PF,971,13,17.9,7.3,1.7,20.0,55.4,79.8,113.0,7,1,6,1,0,3,0,0,0,1
1,Larry Bird,SF,897,13,24.3,10.0,6.3,23.5,49.6,88.6,145.8,12,10,3,1,3,3,1,0,0,1
2,Arnie Risen,C,637,10,12.0,9.7,1.7,16.7,38.1,69.9,56.0,4,1,0,0,0,2,0,0,0,1
3,Alex English,SF,1193,15,21.5,5.5,3.6,19.9,50.7,83.2,100.7,8,3,0,0,0,0,0,0,1,1
4,Bailey Howell,PF,950,12,18.7,9.9,2.0,19.1,48.0,76.2,114.8,6,1,0,0,0,2,0,0,0,1
5,George Gervin,SG,1060,14,25.1,5.3,2.6,21.4,50.4,84.1,116.3,12,9,0,1,0,0,0,0,4,1
6,Gail Goodrich,SG,1031,14,18.6,3.2,4.7,16.7,45.6,80.7,76.3,5,1,0,0,0,1,0,0,0,1
7,David Thompson,SG,592,9,22.7,4.1,3.3,19.9,50.5,78.1,63.3,5,3,0,1,0,0,1,0,0,1
8,George Yardley,SF,472,7,19.2,8.9,1.7,20.6,42.2,78.0,58.5,6,2,0,0,0,0,0,0,1,1
9,Kareem Abdul-Jabbar,C,1560,20,24.6,11.2,3.6,24.6,55.9,72.1,273.4,19,15,11,1,6,6,1,0,2,1


In [6]:
df_1990s.columns

Index(['Name', 'Position', 'Games', 'Career Length', 'PPG', 'RPG', 'APG',
       'PER', 'FG%', 'FT%', 'Win Shares', 'All-Stars', 'All-NBA',
       'All-Defense', 'All-Rookie Team', 'MVPs', 'Chips', 'ROY', 'DPOYs',
       'Scoring Champ', 'HOF'],
      dtype='object')

In [13]:
#There was an error when scraping the data for Buddy Jeannette so I'll manually enter his data
buddy_jeannettee = {
    'Name': 'Buddy Jeannette',
    'Position': 'SG',
    'Games': 139,
    'Career Length': 3,
    'PPG': 7.2,
    'RPG': 0.0,
    'APG': 2.1,
    'PER': 0.0,
    'FG%': 34.1,
    'FT%': 78.1,
    'Win Shares': 15.7,
    'All-Stars': 0,
    'All-NBA': 1,
    'All-Defense': 0,
    'All-Rookie Team':0,
    'MVPs':0,
    'Chips': 0,
    'ROY': 0,
    'DPOYs':0,
    'Scoring Champ':0,
    'HOF':1
}

buddy_jeannettee = pd.DataFrame([buddy_jeannettee])

#Join him to main df
df = pd.concat([df_1990s, buddy_jeannettee], ignore_index=True)

#Check dataframe
df

Unnamed: 0,Name,Position,Games,Career Length,PPG,RPG,APG,PER,FG%,FT%,Win Shares,All-Stars,All-NBA,All-Defense,All-Rookie Team,MVPs,Chips,ROY,DPOYs,Scoring Champ,HOF
0,Kevin McHale,PF,971,13,17.9,7.3,1.7,20.0,55.4,79.8,113.0,7,1,6,1,0,3,0,0,0,1
1,Larry Bird,SF,897,13,24.3,10.0,6.3,23.5,49.6,88.6,145.8,12,10,3,1,3,3,1,0,0,1
2,Arnie Risen,C,637,10,12.0,9.7,1.7,16.7,38.1,69.9,56.0,4,1,0,0,0,2,0,0,0,1
3,Alex English,SF,1193,15,21.5,5.5,3.6,19.9,50.7,83.2,100.7,8,3,0,0,0,0,0,0,1,1
4,Bailey Howell,PF,950,12,18.7,9.9,2.0,19.1,48.0,76.2,114.8,6,1,0,0,0,2,0,0,0,1
5,George Gervin,SG,1060,14,25.1,5.3,2.6,21.4,50.4,84.1,116.3,12,9,0,1,0,0,0,0,4,1
6,Gail Goodrich,SG,1031,14,18.6,3.2,4.7,16.7,45.6,80.7,76.3,5,1,0,0,0,1,0,0,0,1
7,David Thompson,SG,592,9,22.7,4.1,3.3,19.9,50.5,78.1,63.3,5,3,0,1,0,0,1,0,0,1
8,George Yardley,SF,472,7,19.2,8.9,1.7,20.6,42.2,78.0,58.5,6,2,0,0,0,0,0,0,1,1
9,Kareem Abdul-Jabbar,C,1560,20,24.6,11.2,3.6,24.6,55.9,72.1,273.4,19,15,11,1,6,6,1,0,2,1


In [14]:
#Save to csv file 
df.to_csv('1990s HOF Players.csv')