In [2]:
#Import libraries 
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

#Paste player links for the 1980s Hall of Fame class
player_urls = [
    "https://www.basketball-reference.com/players/j/joneskc01.html", #KC Jones 1989
    "https://www.basketball-reference.com/players/w/wilkele01.html", #Lenny Wilkins 1989
    "https://www.basketball-reference.com/players/l/lovelcl01.html", #Clyde Lovellete 1988
    "https://www.basketball-reference.com/players/u/unselwe01.html", #Wes Unseld 1988
    "https://www.basketball-reference.com/players/b/barryri01.html", #Rick Barry 1987
    "https://www.basketball-reference.com/players/f/fraziwa01.html", #Walt Frazier 1987
    "https://www.basketball-reference.com/players/h/houbrbo01.html", #Bob Houbregs 1987
    "https://www.basketball-reference.com/players/m/maravpe01.html", #Pete Maravich 1987
    "https://www.basketball-reference.com/players/w/wanzebo01.html", #Bobby Wanzer 1987
    "https://www.basketball-reference.com/players/c/cunnibi01.html", #Billy Cunningham 1986
    "https://www.basketball-reference.com/players/h/heinsto01.html", #Tom Heinshon 1986
    "https://www.basketball-reference.com/players/c/cervial01.html", #Al Cervi 1985
    "https://www.basketball-reference.com/players/t/thurmna01.html", #Nate Thurmond 1985
    "https://www.basketball-reference.com/players/h/havlijo01.html", #John Havliceck 1984
    "https://www.basketball-reference.com/players/j/jonessa01.html", #Sam Jones 1984
    "https://www.basketball-reference.com/players/b/bradlbi01.html", #Bill Bradley 1983
    "https://www.basketball-reference.com/players/d/debusda01.html", #Dave DeBusschere 1983
    "https://www.basketball-reference.com/players/t/twymaja01.html", #Jack Twyman 1983
    "https://www.basketball-reference.com/players/g/greerha01.html", #Hal Green 1982
    "https://www.basketball-reference.com/players/m/martisl01.html", #Slater Martin 1982
    "https://www.basketball-reference.com/players/r/ramsefr01.html", #Frank Ramsey 1982
    "https://www.basketball-reference.com/players/r/reedwi01.html", #Willis Reed 1982
    "https://www.basketball-reference.com/players/l/lucasje01.html", #Jerry Lucas 1980
    "https://www.basketball-reference.com/players/r/roberos01.html" #Oscar Robertson 1980
]

#Headers for the request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

#Initialize a list to store all players' data
all_players_data = []

#Player position abbreviations
position_map = {
    'Center': 'C',
    'Power Forward': 'PF',
    'Small Forward': 'SF',
    'Shooting Guard': 'SG',
    'Point Guard': 'PG'
}

#Function to clean and extract player position
def get_position(soup):
    position = None  #Default position if not found
    p_elems = soup.find_all('p')

    for p_elem in p_elems:
        if 'Position:' in p_elem.get_text():
            position_text = p_elem.get_text(separator=" ").split("Position:")[1].strip()
            position_text = position_text.replace('▪', '').strip()
            if 'Shoots:' in position_text:
                position_text = position_text.split('Shoots:')[0].strip()
            position_text = " ".join(position_text.split())
            position_text = position_text.replace(' and ', ', ')
            positions = position_text.split(',')
            primary_position = positions[0].strip()
            position = position_map.get(primary_position, primary_position)
            break
    return position



#Function to safely extract MVP count
def get_mvp_count(soup):
    mvp_count = 0
    mvp_elem = soup.find_all('li', {'class': 'poptip'}, string=lambda s: s and 'MVP' in s and 'Finals' not in s and 'AS' not in s and 'MBWA NBA' not in s)
    for elem in mvp_elem:
        text = elem.text.strip()
        if 'x' in text:
            mvp_count += int(text.split('x')[0])  #For players with multiple MVPs, split on 'x' and extract the first element
        else:
            mvp_count += 1  #For players with only 1 MVP
    return mvp_count

#Function to safely extract Scoring Championships count
def get_scoring_champ_count(soup):
    scoring_champ_count = 0
    scoring_champ_elem = soup.find_all('li', {'data-tip': lambda x: x and 'NBA Scoring Champ' in x})
    for elem in scoring_champ_elem:
        text = elem.text.strip()
        if 'x' in text:
            scoring_champ_count += int(text.split('x')[0])  # For players with multiple scoring championships, split on 'x'
        else:
            scoring_champ_count += 1  # For players with just 1 scoring championship
    return scoring_champ_count

#Function to safely extract NBA Championships count
def get_chips_count(soup):
    chips_count = 0
    chip_elem = soup.find_all('li', class_='', string=lambda s: s and ('NBA Champ' in s or 'ABA Champ' in s or 'BAA Champ' in s))
    for elem in chip_elem:
        text = elem.text.strip()
        if 'x' in text:
            chips_count += int(text.split('x')[0])  #For players with multiple championships, split on 'x'
        else:
            chips_count += 1  #For players with 1 championship
    return chips_count

#Loop over each player URL to scrape their data
for url in player_urls:
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        try:
            #Get player name
            player_name = soup.find('h1').find('span').text.strip()

            #Get player position
            position = get_position(soup)

            #Get player career length
            career_length_elem = soup.find('strong', string=lambda s: s and 'Career Length:' in s)
            career_length = int(career_length_elem.next_sibling.strip().split()[0])

            #Safeguard function for extracting stats
            def safe_find(tag, text):
                element = soup.find('span', {'data-tip': text})
                if element:
                    return element.find_next('p').find_next('p').text.strip()
                return 0.0

            games = int(safe_find('Games', 'Games'))
            ppg = float(safe_find('Points', 'Points'))
            rpg = float(safe_find('Total Rebounds', 'Total Rebounds'))
            apg = float(safe_find('Assists', 'Assists'))

            #Extract PER
            per_elem = soup.find('span', {'data-tip': lambda x: x and 'Player Efficiency Rating' in x})
            per = float(per_elem.find_next('p').find_next('p').text.strip() if per_elem else 0.0)

            #Extract Field Goal Percentage and Free Throw Percentage
            fg_pct = float(safe_find('Field Goal Percentage', 'Field Goal Percentage'))
            ft_pct = float(safe_find('Free Throw Percentage', 'Free Throw Percentage'))

            #Extract Win Shares
            win_shares_elem = soup.find('span', {'data-tip': lambda x: x and 'Win Shares' in x})
            win_shares = float(win_shares_elem.next_sibling.find_next('p').text.strip() if win_shares_elem else 0.0)
            
            #Extract awards counts
            mvp_count = get_mvp_count(soup)
            scoring_champ_count = get_scoring_champ_count(soup)
            chips_count = get_chips_count(soup)

            #Extract All-Stars, All-NBA, All-Defense, and other honors
            all_stars = int(soup.find('li', {'class': 'all_star'}).find('a').text.strip().split('x')[0] if soup.find('li', {'class': 'all_star'}) else 0)
            
            #Extract All-NBA, All-ABA, or All-BAA awards
            def award_count(soup, award_name, tag='a', attributes=None):
                #Find the award element
                elem = soup.find(tag, attributes, string=lambda s: s and award_name in s)
                if elem:
                    if 'x' in elem.text:
                        return int(elem.text.strip().split('x')[0])
                    else:
                        return 1
                else:
                    return 0
            all_nba = award_count(soup, 'All-NBA', tag='li', attributes={'class':  ""})
            all_aba = award_count(soup, 'All-ABA')
            all_baa = award_count(soup, 'All-BAA')
            all_nba_total = all_nba + all_aba + all_baa
            
            all_defense_count = sum([int(a.text.strip().split('x')[0]) if 'x' in a.text else 1 for a in soup.find('li', {'class': 'poptip'}, string=lambda s: s and 'All-Defensive' in s).find('a')]) if soup.find_all('li', {'class': 'poptip'}, string=lambda s: s and 'All-Defensive' in s) else 0
            all_rookie = 1 if soup.find('li', {'data-tip': lambda s: s and 'All-Rookie' in s}) else 0
            roy = 1 if soup.find('li', {'class': 'poptip'}, string=lambda s: s and 'ROY' in s and 'MBWA NBA' not in s) else 0
            dpoy_count = sum([int(text.split('x')[0]) if 'x' in text else 1 for text in [elem.text.strip() for elem in soup.find_all('li', class_='poptip', string=lambda s: s and 'Def. POY' in s)]])
            
            #All players are in the Hall of Fame
            hof = 1

            #Store the data
            player_data = {
                'Name': player_name,
                'Position': position,
                'Games': games,
                'Career Length': career_length,
                'PPG': ppg,
                'RPG': rpg,
                'APG': apg,
                'PER': per,
                'FG%': fg_pct,
                'FT%': ft_pct,
                'Win Shares': win_shares,
                'All-Stars': all_stars,
                'All-NBA': all_nba_total,
                'All-Defense': all_defense_count,
                'All-Rookie Team': all_rookie,
                'MVPs': mvp_count,
                'Chips': chips_count,
                'ROY': roy,
                'DPOYs': dpoy_count,
                'Scoring Champ': scoring_champ_count,
                'HOF': hof
            }
            all_players_data.append(player_data)

        except Exception as e:
            print(f"Error scraping data for {url}: {e}")

        time.sleep(1)  #Be polite with requests, avoid overwhelming the server

#Create a DataFrame from the collected data
df_1980s = pd.DataFrame(all_players_data)

In [3]:
#Display all columns
pd.set_option('display.max_columns', None)

In [4]:
df_1980s

Unnamed: 0,Name,Position,Games,Career Length,PPG,RPG,APG,PER,FG%,FT%,Win Shares,All-Stars,All-NBA,All-Defense,All-Rookie Team,MVPs,Chips,ROY,DPOYs,Scoring Champ,HOF
0,K.C. Jones,PG,676,9,7.4,3.5,4.3,10.4,38.7,64.7,38.6,0,0,0,0,0,8,0,0,0,1
1,Lenny Wilkens,PG,1077,15,16.5,4.7,6.7,16.8,43.2,77.4,95.5,9,0,0,0,0,0,0,0,0,1
2,Clyde Lovellette,C,704,11,17.0,9.5,1.6,21.7,44.3,75.7,70.6,4,1,0,0,0,3,0,0,0,1
3,Wes Unseld,C,984,13,10.8,14.0,3.9,16.0,50.9,63.3,110.1,5,1,0,1,1,1,1,0,0,1
4,Rick Barry,SF,1020,14,24.8,6.7,4.9,21.0,45.6,89.3,128.9,12,10,0,1,0,1,1,0,1,1
5,Walt Frazier,PG,825,13,18.9,5.9,6.1,19.1,49.0,78.6,113.5,7,6,7,1,0,2,0,0,0,1
6,Bob Houbregs,C,281,5,9.3,5.5,1.8,15.6,40.4,72.1,16.5,0,0,0,0,0,0,0,0,0,1
7,Pete Maravich,SG,658,10,24.2,4.2,5.4,18.4,44.1,82.0,46.7,5,4,0,1,0,0,0,0,1,1
8,Bobby Wanzer,SG,568,9,12.2,4.5,3.2,17.3,39.3,80.2,63.9,5,3,0,0,0,1,0,0,0,1
9,Billy Cunningham,SF,770,11,21.2,10.4,4.3,20.0,45.2,73.0,78.6,5,5,0,1,1,1,0,0,0,1


In [14]:
df_1980s.to_csv('1980s HOF players.csv')