In [1]:
# import dependencies
import pandas as pd
import math

In [2]:
# url to scrape
url = 'https://www.basketball-reference.com/awards/all_star_by_player.html'

# scrape table using pandas
table = pd.read_html(url)[0]

# drop the unnecessary columns
table.drop(['Rk', 'Tot', 'ABA'], axis = 1, inplace = True)

In [3]:
# format table
table.rename(columns={'Player': 'player_name',
                      'NBA': 'made_asg'}, inplace = True)

In [4]:
# read in id_map
id_map = pd.read_csv('data/id_map.csv')

# merge table with id_map
merged = pd.merge(id_map, table, on = 'player_name', how = 'left')

In [5]:
# adjust made_asg to represent whether or not the player made at last one all-star game
merged.loc[merged['made_asg'] >= 1, 'made_asg'] = 1
merged['made_asg'] = merged['made_asg'].fillna(0)

In [6]:
# fill remaining NA with None
merged = merged.fillna('')

# preview data
merged.head()

Unnamed: 0,player_name,pro_id,cbb_id,made_asg
0,Kenyon Martin,martike01,kenyon-martin-1,1.0
1,Stromile Swift,swiftst01,stromile-swift-1,0.0
2,Darius Miles,milesda01,,0.0
3,Marcus Fizer,fizerma01,marcus-fizer-1,0.0
4,Mike Miller,millemi01,mike-miller-1,0.0


In [7]:
# write to csv
merged.to_csv('data/id_map_asg.csv')