# EDA Time
******
## We finally pulled all of the data for our 11 GOAT Contenders
## Now it's time to do a proper cleaning and find out what the data is telling us

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

### Kareem Data Cleaning
**The major issue with Kareem's stats was that stats like Steals, Blocks, Offensive Rebounds, and Defensive Rebounds were not an official tallied statistic until the 1973-1974 season.  Also, Turnovers were not tallied until the 1977-1978 season.  Much of his data will be skewed in these 5 categories.  Luckily, there was no 3-Point line until the 1979-1980 season, so we can leave that as 0 in all categories.  With so many columns full of inconclusive data, my only option would be to fill in with either false data from averages.  So, I will leave him off of my comparison and use other players that could be considered all-time greats.
**

# Dilemma
## So with the gaps in Kareem's data, I'm going to have to do a readjustment.  Who can take Cap's place

# Let's add more players

In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
from selenium import webdriver

### Allen Iverson
#### Per Game

In [3]:
driver = webdriver.Chrome()
driver.get('https://www.basketball-reference.com/players/i/iversal01.html')
soup = BeautifulSoup(driver.page_source, 'lxml')
driver.quit()
#table = soup.find_all('table')[14] #advanced stats
table = soup.find_all('table')[8] #per_game stats

In [4]:
table

<table class="row_summable sortable stats_table now_sortable" data-cols-to-freeze="1" id="per_game"><caption>Per Game Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="If listed as single number, the year the season ended.★ - Indicates All-Star for league.Only on regular season tables." class=" poptip sort_default_asc center" data-stat="season" data-tip="If listed as single number, the year the season ended.&lt;br&gt;★ - Indicates All-Star for league.&lt;br&gt;Only on regular season tables." scope="col">Season</th>
<th aria-label="Age of Player at the start of February 1st of that season." class=" poptip sort_default_asc center" data-stat="age" data-tip="Age of Player at the start of February 1st of that season." scope="col">Age</th>
<th aria-label="Team" class=" poptip sort_default_asc cente

In [9]:
reebok = []

table = soup.find('table', {'id': 'per_game'})
for row in table.find_all('tr')[1:]:
        crossover = {}
        crossover['Age'] = row.find('td', {'data-stat': 'age'}).text
        crossover['Games'] = row.find('td', {'data-stat': 'g'}).text
        crossover['Minutes Per Game'] = row.find('td', {'data-stat': 'mp_per_g'}).text
        crossover['FG Per Game'] = row.find('td', {'data-stat': 'fg_per_g'}).text
        crossover['FG Attempts Per Game'] = row.find('td', {'data-stat': 'fga_per_g'}).text
        crossover['FG %'] = row.find('td', {'data-stat': 'fg_pct'}).text
        crossover['3PT Per Game'] = row.find('td', {'data-stat': 'fg3_per_g'}).text
        crossover['3PT Attempts Per Game'] = row.find('td', {'data-stat': 'fg3a_per_g'}).text
        crossover['3PT %'] = row.find('td', {'data-stat': 'fg3_pct'}).text
        crossover['2PT Per Game'] = row.find('td', {'data-stat': 'fg2_per_g'}).text
        crossover['2PT Attempts Per Game'] = row.find('td', {'data-stat': 'fg2a_per_g'}).text
        crossover['2PT %'] = row.find('td', {'data-stat': 'fg2_pct'}).text
        crossover['Effective FG%'] = row.find('td', {'data-stat': 'efg_pct'}).text
        crossover['FT Per Game'] = row.find('td', {'data-stat': 'ft_per_g'}).text
        crossover['FT Attempts Per Game'] = row.find('td', {'data-stat': 'fta_per_g'}).text
        crossover['FT %'] = row.find('td', {'data-stat': 'ft_pct'}).text
        crossover['Offensive Reb Per Game'] = row.find('td', {'data-stat': 'orb_per_g'}).text
        crossover['Defensive Reb Per Game'] = row.find('td', {'data-stat': 'drb_per_g'}).text
        crossover['Total Reb Per Game'] = row.find('td', {'data-stat': 'trb_per_g'}).text
        crossover['Assists Per Game'] = row.find('td', {'data-stat': 'ast_per_g'}).text
        crossover['Steals Per Game'] = row.find('td', {'data-stat': 'stl_per_g'}).text
        crossover['Blocks Per Game'] = row.find('td', {'data-stat': 'blk_per_g'}).text
        crossover['Turnovers Per Game'] = row.find('td', {'data-stat': 'tov_per_g'}).text
        crossover['Fouls Per Game'] = row.find('td', {'data-stat': 'pf_per_g'}).text
        crossover['Points Per Game'] = row.find('td', {'data-stat': 'pts_per_g'}).text
        reebok.append(crossover)

In [10]:
per_game = ['Age', 'Games', 'Minutes Per Game', 'FG Per Game', 'FG Attempts Per Game', 'FG %', '3PT Per Game', '3PT Attempts Per Game', '3PT %', '2PT Per Game', '2PT Attempts Per Game', '2PT %', 'Effective FG%', 'FT Per Game', 'FT Attempts Per Game', 'FT %', 'Offensive Reb Per Game', 'Defensive Reb Per Game', 'Total Reb Per Game', 'Assists Per Game', 'Steals Per Game', 'Blocks Per Game', 'Turnovers Per Game', 'Fouls Per Game', 'Points Per Game']

In [11]:
iverson = pd.DataFrame(reebok, columns=per_game)
iverson

Unnamed: 0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,FT %,Offensive Reb Per Game,Defensive Reb Per Game,Total Reb Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Fouls Per Game,Points Per Game
0,21.0,76.0,40.1,8.2,19.8,0.416,2.0,6.0,0.341,6.2,...,0.702,1.5,2.6,4.1,7.5,2.1,0.3,4.4,3.1,23.5
1,22.0,80.0,39.4,8.1,17.6,0.461,0.9,2.9,0.298,7.2,...,0.729,1.1,2.6,3.7,6.2,2.2,0.3,3.1,2.5,22.0
2,23.0,48.0,41.5,9.1,22.0,0.412,1.2,4.1,0.291,7.9,...,0.751,1.4,3.5,4.9,4.6,2.3,0.1,3.5,2.0,26.8
3,24.0,70.0,40.8,10.4,24.8,0.421,1.3,3.7,0.341,9.1,...,0.713,1.0,2.8,3.8,4.7,2.1,0.1,3.3,2.3,28.4
4,25.0,71.0,42.0,10.7,25.5,0.42,1.4,4.3,0.32,9.4,...,0.814,0.7,3.1,3.8,4.6,2.5,0.3,3.3,2.1,31.1
5,26.0,60.0,43.7,11.1,27.8,0.398,1.3,4.5,0.291,9.8,...,0.812,0.7,3.8,4.5,5.5,2.8,0.2,4.0,1.7,31.4
6,27.0,82.0,42.5,9.8,23.7,0.414,1.0,3.7,0.277,8.8,...,0.774,0.8,3.4,4.2,5.5,2.7,0.2,3.5,1.8,27.6
7,28.0,48.0,42.5,9.1,23.4,0.387,1.2,4.1,0.286,7.9,...,0.745,0.7,3.0,3.7,6.8,2.4,0.1,4.4,1.8,26.4
8,29.0,75.0,42.3,10.3,24.2,0.424,1.4,4.5,0.308,8.9,...,0.835,0.7,3.3,4.0,7.9,2.4,0.1,4.6,1.9,30.7
9,30.0,72.0,43.1,11.3,25.3,0.447,1.0,3.1,0.323,10.3,...,0.814,0.6,2.6,3.2,7.4,1.9,0.1,3.4,1.7,33.0


In [12]:
iverson = iverson.drop(iverson.index[[11, 12, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25]])
iverson

Unnamed: 0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,FT %,Offensive Reb Per Game,Defensive Reb Per Game,Total Reb Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Fouls Per Game,Points Per Game
0,21,76,40.1,8.2,19.8,0.416,2.0,6.0,0.341,6.2,...,0.702,1.5,2.6,4.1,7.5,2.1,0.3,4.4,3.1,23.5
1,22,80,39.4,8.1,17.6,0.461,0.9,2.9,0.298,7.2,...,0.729,1.1,2.6,3.7,6.2,2.2,0.3,3.1,2.5,22.0
2,23,48,41.5,9.1,22.0,0.412,1.2,4.1,0.291,7.9,...,0.751,1.4,3.5,4.9,4.6,2.3,0.1,3.5,2.0,26.8
3,24,70,40.8,10.4,24.8,0.421,1.3,3.7,0.341,9.1,...,0.713,1.0,2.8,3.8,4.7,2.1,0.1,3.3,2.3,28.4
4,25,71,42.0,10.7,25.5,0.42,1.4,4.3,0.32,9.4,...,0.814,0.7,3.1,3.8,4.6,2.5,0.3,3.3,2.1,31.1
5,26,60,43.7,11.1,27.8,0.398,1.3,4.5,0.291,9.8,...,0.812,0.7,3.8,4.5,5.5,2.8,0.2,4.0,1.7,31.4
6,27,82,42.5,9.8,23.7,0.414,1.0,3.7,0.277,8.8,...,0.774,0.8,3.4,4.2,5.5,2.7,0.2,3.5,1.8,27.6
7,28,48,42.5,9.1,23.4,0.387,1.2,4.1,0.286,7.9,...,0.745,0.7,3.0,3.7,6.8,2.4,0.1,4.4,1.8,26.4
8,29,75,42.3,10.3,24.2,0.424,1.4,4.5,0.308,8.9,...,0.835,0.7,3.3,4.0,7.9,2.4,0.1,4.6,1.9,30.7
9,30,72,43.1,11.3,25.3,0.447,1.0,3.1,0.323,10.3,...,0.814,0.6,2.6,3.2,7.4,1.9,0.1,3.4,1.7,33.0


#### Advanced

In [13]:
driver = webdriver.Chrome()
driver.get('https://www.basketball-reference.com/players/i/iversal01.html')
soup = BeautifulSoup(driver.page_source, 'lxml')
driver.quit()
table = soup.find_all('table')[13] #advanced stats
#table = soup.find_all('table')[8] #per_game stats

In [14]:
table

<table class="row_summable sortable stats_table now_sortable" data-cols-to-freeze="1" id="advanced"><caption>Advanced Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="If listed as single number, the year the season ended.★ - Indicates All-Star for league.Only on regular season tables." class=" poptip sort_default_asc center" data-stat="season" data-tip="If listed as single number, the year the season ended.&lt;br&gt;★ - Indicates All-Star for league.&lt;br&gt;Only on regular season tables." scope="col">Season</th>
<th aria-label="Age of Player at the start of February 1st of that season." class=" poptip sort_default_asc center" data-stat="age" data-tip="Age of Player at the start of February 1st of that season." scope="col">Age</th>
<th aria-label="Team" class=" poptip sort_default_asc center" dat

In [15]:
adv_reebok = []

table = soup.find('table', {'id': 'advanced'})

for row in table.find_all('tr')[1:]:
    adv_crossover = {}
    adv_crossover['Adv Age'] = row.find('td', {'data-stat':'age'}).text
    adv_crossover['PER'] = row.find('td', {'data-stat': 'per'}).text
    adv_crossover['True Shooting'] = row.find('td', {'data-stat': 'ts_pct'}).text
    adv_crossover['Total Rebounding %'] = row.find('td', {'data-stat': 'trb_pct'}).text
    adv_crossover['Assist %'] = row.find('td', {'data-stat': 'ast_pct'}).text
    adv_crossover['Steal %'] = row.find('td', {'data-stat': 'stl_pct'}).text
    adv_crossover['Block %'] = row.find('td', {'data-stat': 'blk_pct'}).text
    adv_crossover['Turnover %'] = row.find('td', {'data-stat': 'tov_pct'}).text
    adv_crossover['Usage %'] = row.find('td', {'data-stat': 'usg_pct'}).text
    adv_crossover['Win Shares'] = row.find('td', {'data-stat': 'ws'}).text
    adv_crossover['Offensive Box +/-'] = row.find('td', {'data-stat': 'obpm'}).text
    adv_crossover['Defensive Box +/-'] = row.find('td', {'data-stat': 'dbpm'}).text
    adv_crossover['Box +/-'] = row.find('td', {'data-stat': 'bpm'}).text
    adv_crossover['VORP'] = row.find('td', {'data-stat': 'vorp'}).text
    adv_reebok.append(adv_crossover)

In [16]:
advanced = ['Adv Age', 'PER', 'True Shooting', 'Total Rebounding %', 'Assist %', 'Steal %', 'Block %', 'Turnover %', 'Usage %', 'Win Shares', 'Offensive Box +/-', 'Defensive Box +/-', 'Box +/-', 'VORP']

In [17]:
adv_iverson = pd.DataFrame(adv_reebok, columns=advanced)
adv_iverson

Unnamed: 0,Adv Age,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,Offensive Box +/-,Defensive Box +/-,Box +/-,VORP
0,21.0,18.0,0.513,5.6,33.6,2.6,0.6,16.2,28.9,4.1,3.4,-2.0,1.5,2.7
1,22.0,20.4,0.535,5.4,30.6,3.0,0.6,12.9,26.9,9.0,4.5,-0.7,3.8,4.6
2,23.0,22.2,0.508,6.9,24.2,3.0,0.3,11.7,32.6,7.2,4.9,-0.3,4.6,3.3
3,24.0,20.0,0.496,5.2,23.0,2.6,0.1,10.3,34.4,6.9,3.4,-0.8,2.6,3.3
4,25.0,24.0,0.518,5.2,23.0,3.2,0.5,10.0,35.9,11.8,5.0,-0.1,4.8,5.1
5,26.0,21.9,0.489,5.8,27.6,3.5,0.4,11.0,37.8,6.9,3.7,0.2,3.8,3.9
6,27.0,21.2,0.5,5.8,25.1,3.4,0.3,11.2,32.9,9.2,3.1,-0.3,2.8,4.2
7,28.0,19.3,0.478,5.1,34.4,3.1,0.2,13.6,35.3,2.8,3.1,-0.6,2.5,2.3
8,29.0,23.2,0.532,5.3,37.6,2.9,0.2,13.7,35.0,9.0,4.8,-0.5,4.3,5.0
9,30.0,25.9,0.543,4.4,34.9,2.3,0.2,10.2,35.8,10.6,6.0,-2.5,3.5,4.3


In [18]:
adv_iverson = adv_iverson.drop(adv_iverson.index[[11, 12, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25]])
adv_iverson

Unnamed: 0,Adv Age,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,Offensive Box +/-,Defensive Box +/-,Box +/-,VORP
0,21,18.0,0.513,5.6,33.6,2.6,0.6,16.2,28.9,4.1,3.4,-2.0,1.5,2.7
1,22,20.4,0.535,5.4,30.6,3.0,0.6,12.9,26.9,9.0,4.5,-0.7,3.8,4.6
2,23,22.2,0.508,6.9,24.2,3.0,0.3,11.7,32.6,7.2,4.9,-0.3,4.6,3.3
3,24,20.0,0.496,5.2,23.0,2.6,0.1,10.3,34.4,6.9,3.4,-0.8,2.6,3.3
4,25,24.0,0.518,5.2,23.0,3.2,0.5,10.0,35.9,11.8,5.0,-0.1,4.8,5.1
5,26,21.9,0.489,5.8,27.6,3.5,0.4,11.0,37.8,6.9,3.7,0.2,3.8,3.9
6,27,21.2,0.5,5.8,25.1,3.4,0.3,11.2,32.9,9.2,3.1,-0.3,2.8,4.2
7,28,19.3,0.478,5.1,34.4,3.1,0.2,13.6,35.3,2.8,3.1,-0.6,2.5,2.3
8,29,23.2,0.532,5.3,37.6,2.9,0.2,13.7,35.0,9.0,4.8,-0.5,4.3,5.0
9,30,25.9,0.543,4.4,34.9,2.3,0.2,10.2,35.8,10.6,6.0,-2.5,3.5,4.3


### Dwyane Wade
#### Per Game

In [25]:
driver = webdriver.Chrome()
driver.get('https://www.basketball-reference.com/players/w/wadedw01.html')
soup = BeautifulSoup(driver.page_source, 'lxml')
driver.quit()
#table = soup.find_all('table')[14] #advanced stats
table = soup.find_all('table')[9] #per_game stats

In [26]:
table

<table class="row_summable sortable stats_table now_sortable" data-cols-to-freeze="1" id="per_game"><caption>Per Game Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="If listed as single number, the year the season ended.★ - Indicates All-Star for league.Only on regular season tables." class=" poptip sort_default_asc center" data-stat="season" data-tip="If listed as single number, the year the season ended.&lt;br&gt;★ - Indicates All-Star for league.&lt;br&gt;Only on regular season tables." scope="col">Season</th>
<th aria-label="Age of Player at the start of February 1st of that season." class=" poptip sort_default_asc center" data-stat="age" data-tip="Age of Player at the start of February 1st of that season." scope="col">Age</th>
<th aria-label="Team" class=" poptip sort_default_asc cente

In [27]:
flash = []

table = soup.find('table', {'id': 'per_game'})
for row in table.find_all('tr')[1:21]:
        heat = {}
        heat['Age'] = row.find('td', {'data-stat': 'age'}).text
        heat['Games'] = row.find('td', {'data-stat': 'g'}).text
        heat['Minutes Per Game'] = row.find('td', {'data-stat': 'mp_per_g'}).text
        heat['FG Per Game'] = row.find('td', {'data-stat': 'fg_per_g'}).text
        heat['FG Attempts Per Game'] = row.find('td', {'data-stat': 'fga_per_g'}).text
        heat['FG %'] = row.find('td', {'data-stat': 'fg_pct'}).text
        heat['3PT Per Game'] = row.find('td', {'data-stat': 'fg3_per_g'}).text
        heat['3PT Attempts Per Game'] = row.find('td', {'data-stat': 'fg3a_per_g'}).text
        heat['3PT %'] = row.find('td', {'data-stat': 'fg3_pct'}).text
        heat['2PT Per Game'] = row.find('td', {'data-stat': 'fg2_per_g'}).text
        heat['2PT Attempts Per Game'] = row.find('td', {'data-stat': 'fg2a_per_g'}).text
        heat['2PT %'] = row.find('td', {'data-stat': 'fg2_pct'}).text
        heat['Effective FG%'] = row.find('td', {'data-stat': 'efg_pct'}).text
        heat['FT Per Game'] = row.find('td', {'data-stat': 'ft_per_g'}).text
        heat['FT Attempts Per Game'] = row.find('td', {'data-stat': 'fta_per_g'}).text
        heat['FT %'] = row.find('td', {'data-stat': 'ft_pct'}).text
        heat['Offensive Reb Per Game'] = row.find('td', {'data-stat': 'orb_per_g'}).text
        heat['Defensive Reb Per Game'] = row.find('td', {'data-stat': 'drb_per_g'}).text
        heat['Total Reb Per Game'] = row.find('td', {'data-stat': 'trb_per_g'}).text
        heat['Assists Per Game'] = row.find('td', {'data-stat': 'ast_per_g'}).text
        heat['Steals Per Game'] = row.find('td', {'data-stat': 'stl_per_g'}).text
        heat['Blocks Per Game'] = row.find('td', {'data-stat': 'blk_per_g'}).text
        heat['Turnovers Per Game'] = row.find('td', {'data-stat': 'tov_per_g'}).text
        heat['Fouls Per Game'] = row.find('td', {'data-stat': 'pf_per_g'}).text
        heat['Points Per Game'] = row.find('td', {'data-stat': 'pts_per_g'}).text
        flash.append(heat)

In [28]:
dwade = pd.DataFrame(flash, columns=per_game)
dwade

Unnamed: 0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,FT %,Offensive Reb Per Game,Defensive Reb Per Game,Total Reb Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Fouls Per Game,Points Per Game
0,22.0,61.0,34.9,6.1,13.1,0.465,0.3,0.9,0.302,5.8,...,0.747,1.4,2.7,4.0,4.5,1.4,0.6,3.2,2.3,16.2
1,23.0,77.0,38.6,8.2,17.1,0.478,0.2,0.6,0.289,8.0,...,0.762,1.4,3.7,5.2,6.8,1.6,1.1,4.2,3.0,24.1
2,24.0,75.0,38.6,9.3,18.8,0.495,0.2,1.0,0.171,9.1,...,0.783,1.4,4.3,5.7,6.7,1.9,0.8,3.6,2.9,27.2
3,25.0,51.0,37.9,9.3,18.9,0.491,0.4,1.5,0.266,8.8,...,0.807,1.0,3.7,4.7,7.5,2.1,1.2,4.2,2.3,27.4
4,26.0,51.0,38.3,8.6,18.4,0.469,0.4,1.5,0.286,8.2,...,0.758,0.9,3.3,4.2,6.9,1.7,0.7,4.4,2.7,24.6
5,27.0,79.0,38.6,10.8,22.0,0.491,1.1,3.5,0.317,9.7,...,0.765,1.1,3.9,5.0,7.5,2.2,1.3,3.4,2.3,30.2
6,28.0,77.0,36.3,9.3,19.6,0.476,0.9,3.2,0.3,8.4,...,0.761,1.4,3.5,4.8,6.5,1.8,1.1,3.3,2.4,26.6
7,29.0,76.0,37.1,9.1,18.2,0.5,0.8,2.7,0.306,8.3,...,0.758,1.6,4.8,6.4,4.6,1.5,1.1,3.1,2.6,25.5
8,30.0,49.0,33.2,8.5,17.1,0.497,0.3,1.1,0.268,8.2,...,0.791,1.5,3.3,4.8,4.6,1.7,1.3,2.6,2.2,22.1
9,31.0,69.0,34.7,8.2,15.8,0.521,0.2,1.0,0.258,8.0,...,0.725,1.2,3.7,5.0,5.1,1.9,0.8,2.8,2.0,21.2


In [29]:
dwade = dwade.drop(dwade.index[[15, 16, 17, 18, 19]])
dwade

Unnamed: 0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,FT %,Offensive Reb Per Game,Defensive Reb Per Game,Total Reb Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Fouls Per Game,Points Per Game
0,22,61,34.9,6.1,13.1,0.465,0.3,0.9,0.302,5.8,...,0.747,1.4,2.7,4.0,4.5,1.4,0.6,3.2,2.3,16.2
1,23,77,38.6,8.2,17.1,0.478,0.2,0.6,0.289,8.0,...,0.762,1.4,3.7,5.2,6.8,1.6,1.1,4.2,3.0,24.1
2,24,75,38.6,9.3,18.8,0.495,0.2,1.0,0.171,9.1,...,0.783,1.4,4.3,5.7,6.7,1.9,0.8,3.6,2.9,27.2
3,25,51,37.9,9.3,18.9,0.491,0.4,1.5,0.266,8.8,...,0.807,1.0,3.7,4.7,7.5,2.1,1.2,4.2,2.3,27.4
4,26,51,38.3,8.6,18.4,0.469,0.4,1.5,0.286,8.2,...,0.758,0.9,3.3,4.2,6.9,1.7,0.7,4.4,2.7,24.6
5,27,79,38.6,10.8,22.0,0.491,1.1,3.5,0.317,9.7,...,0.765,1.1,3.9,5.0,7.5,2.2,1.3,3.4,2.3,30.2
6,28,77,36.3,9.3,19.6,0.476,0.9,3.2,0.3,8.4,...,0.761,1.4,3.5,4.8,6.5,1.8,1.1,3.3,2.4,26.6
7,29,76,37.1,9.1,18.2,0.5,0.8,2.7,0.306,8.3,...,0.758,1.6,4.8,6.4,4.6,1.5,1.1,3.1,2.6,25.5
8,30,49,33.2,8.5,17.1,0.497,0.3,1.1,0.268,8.2,...,0.791,1.5,3.3,4.8,4.6,1.7,1.3,2.6,2.2,22.1
9,31,69,34.7,8.2,15.8,0.521,0.2,1.0,0.258,8.0,...,0.725,1.2,3.7,5.0,5.1,1.9,0.8,2.8,2.0,21.2


#### Advanced

In [30]:
driver = webdriver.Chrome()
driver.get('https://www.basketball-reference.com/players/w/wadedw01.html')
soup = BeautifulSoup(driver.page_source, 'lxml')
driver.quit()
table = soup.find_all('table')[14] #advanced stats
#table = soup.find_all('table')[9] #per_game stats

In [31]:
table

<table class="row_summable sortable stats_table now_sortable" data-cols-to-freeze="1" id="advanced"><caption>Advanced Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="If listed as single number, the year the season ended.★ - Indicates All-Star for league.Only on regular season tables." class=" poptip sort_default_asc center" data-stat="season" data-tip="If listed as single number, the year the season ended.&lt;br&gt;★ - Indicates All-Star for league.&lt;br&gt;Only on regular season tables." scope="col">Season</th>
<th aria-label="Age of Player at the start of February 1st of that season." class=" poptip sort_default_asc center" data-stat="age" data-tip="Age of Player at the start of February 1st of that season." scope="col">Age</th>
<th aria-label="Team" class=" poptip sort_default_asc center" dat

In [32]:
adv_flash = []

table = soup.find('table', {'id': 'advanced'})

for row in table.find_all('tr')[1:]:
    adv_heat = {}
    adv_heat['Adv Age'] = row.find('td', {'data-stat':'age'}).text
    adv_heat['PER'] = row.find('td', {'data-stat': 'per'}).text
    adv_heat['True Shooting'] = row.find('td', {'data-stat': 'ts_pct'}).text
    adv_heat['Total Rebounding %'] = row.find('td', {'data-stat': 'trb_pct'}).text
    adv_heat['Assist %'] = row.find('td', {'data-stat': 'ast_pct'}).text
    adv_heat['Steal %'] = row.find('td', {'data-stat': 'stl_pct'}).text
    adv_heat['Block %'] = row.find('td', {'data-stat': 'blk_pct'}).text
    adv_heat['Turnover %'] = row.find('td', {'data-stat': 'tov_pct'}).text
    adv_heat['Usage %'] = row.find('td', {'data-stat': 'usg_pct'}).text
    adv_heat['Win Shares'] = row.find('td', {'data-stat': 'ws'}).text
    adv_heat['Offensive Box +/-'] = row.find('td', {'data-stat': 'obpm'}).text
    adv_heat['Defensive Box +/-'] = row.find('td', {'data-stat': 'dbpm'}).text
    adv_heat['Box +/-'] = row.find('td', {'data-stat': 'bpm'}).text
    adv_heat['VORP'] = row.find('td', {'data-stat': 'vorp'}).text
    adv_flash.append(adv_heat)

In [33]:
adv_dwade = pd.DataFrame(adv_flash, columns=advanced)
adv_dwade.head

<bound method NDFrame.head of    Adv Age   PER True Shooting Total Rebounding % Assist % Steal % Block %  \
0       22  17.6          .530                6.8     25.1     2.2     1.3   
1       23  23.1          .561                7.8     31.0     2.2     2.0   
2       24  27.6          .577                8.7     33.0     2.6     1.5   
3       25  28.9          .583                7.3     40.5     3.0     2.5   
4       26  21.5          .549                6.6     37.5     2.4     1.5   
5       27  30.4          .574                7.8     40.3     3.0     2.8   
6       28  28.0          .562                7.8     36.4     2.7     2.4   
7       29  25.6          .581               10.2     23.5     2.1     2.4   
8       30  26.3          .559                8.7     27.3     2.7     3.1   
9       31  24.0          .571                8.9     26.6     2.8     1.9   
10      32  22.0          .588                8.5     25.5     2.3     1.4   
11      33  21.4          .534    

In [34]:
adv_dwade

Unnamed: 0,Adv Age,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,Offensive Box +/-,Defensive Box +/-,Box +/-,VORP
0,22.0,17.6,0.53,6.8,25.1,2.2,1.3,17.3,25.0,4.6,0.9,0.4,1.3,1.8
1,23.0,23.1,0.561,7.8,31.0,2.2,2.0,16.3,30.9,11.0,3.1,1.4,4.4,4.8
2,24.0,27.6,0.577,8.7,33.0,2.6,1.5,13.2,32.5,14.4,5.9,1.6,7.5,7.0
3,25.0,28.9,0.583,7.3,40.5,3.0,2.5,15.3,34.7,8.8,6.5,1.9,8.4,5.1
4,26.0,21.5,0.549,6.6,37.5,2.4,1.5,16.4,33.1,3.3,3.0,0.0,3.0,2.5
5,27.0,30.4,0.574,7.8,40.3,3.0,2.8,11.6,36.2,14.7,8.7,2.0,10.7,9.7
6,28.0,28.0,0.562,7.8,36.4,2.7,2.4,12.2,34.9,13.0,7.4,2.0,9.4,8.0
7,29.0,25.6,0.581,10.2,23.5,2.1,2.4,12.4,31.6,12.8,4.4,1.5,5.9,5.7
8,30.0,26.3,0.559,8.7,27.3,2.7,3.1,11.8,31.3,7.7,4.5,1.8,6.3,3.4
9,31.0,24.0,0.571,8.9,26.6,2.8,1.9,13.2,29.5,9.6,2.8,1.1,3.9,3.5


In [35]:
adv_dwade = adv_dwade.drop(adv_dwade.index[[15, 16, 17, 18, 19, 20, 21]])
adv_dwade

Unnamed: 0,Adv Age,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,Offensive Box +/-,Defensive Box +/-,Box +/-,VORP
0,22,17.6,0.53,6.8,25.1,2.2,1.3,17.3,25.0,4.6,0.9,0.4,1.3,1.8
1,23,23.1,0.561,7.8,31.0,2.2,2.0,16.3,30.9,11.0,3.1,1.4,4.4,4.8
2,24,27.6,0.577,8.7,33.0,2.6,1.5,13.2,32.5,14.4,5.9,1.6,7.5,7.0
3,25,28.9,0.583,7.3,40.5,3.0,2.5,15.3,34.7,8.8,6.5,1.9,8.4,5.1
4,26,21.5,0.549,6.6,37.5,2.4,1.5,16.4,33.1,3.3,3.0,0.0,3.0,2.5
5,27,30.4,0.574,7.8,40.3,3.0,2.8,11.6,36.2,14.7,8.7,2.0,10.7,9.7
6,28,28.0,0.562,7.8,36.4,2.7,2.4,12.2,34.9,13.0,7.4,2.0,9.4,8.0
7,29,25.6,0.581,10.2,23.5,2.1,2.4,12.4,31.6,12.8,4.4,1.5,5.9,5.7
8,30,26.3,0.559,8.7,27.3,2.7,3.1,11.8,31.3,7.7,4.5,1.8,6.3,3.4
9,31,24.0,0.571,8.9,26.6,2.8,1.9,13.2,29.5,9.6,2.8,1.1,3.9,3.5


### Dirk Nowitzki
#### Per Game

In [36]:
driver = webdriver.Chrome()
driver.get('https://www.basketball-reference.com/players/n/nowitdi01.html')
soup = BeautifulSoup(driver.page_source, 'lxml')
driver.quit()
#table = soup.find_all('table')[14] #advanced stats
table = soup.find_all('table')[9] #per_game stats

In [37]:
table

<table class="row_summable sortable stats_table now_sortable" data-cols-to-freeze="1" id="per_game"><caption>Per Game Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="If listed as single number, the year the season ended.★ - Indicates All-Star for league.Only on regular season tables." class=" poptip sort_default_asc center" data-stat="season" data-tip="If listed as single number, the year the season ended.&lt;br&gt;★ - Indicates All-Star for league.&lt;br&gt;Only on regular season tables." scope="col">Season</th>
<th aria-label="Age of Player at the start of February 1st of that season." class=" poptip sort_default_asc center" data-stat="age" data-tip="Age of Player at the start of February 1st of that season." scope="col">Age</th>
<th aria-label="Team" class=" poptip sort_default_asc cente

In [38]:
dallas = []

table = soup.find('table', {'id': 'per_game'})
for row in table.find_all('tr')[1:21]:
        mavs = {}
        mavs['Age'] = row.find('td', {'data-stat': 'age'}).text
        mavs['Games'] = row.find('td', {'data-stat': 'g'}).text
        mavs['Minutes Per Game'] = row.find('td', {'data-stat': 'mp_per_g'}).text
        mavs['FG Per Game'] = row.find('td', {'data-stat': 'fg_per_g'}).text
        mavs['FG Attempts Per Game'] = row.find('td', {'data-stat': 'fga_per_g'}).text
        mavs['FG %'] = row.find('td', {'data-stat': 'fg_pct'}).text
        mavs['3PT Per Game'] = row.find('td', {'data-stat': 'fg3_per_g'}).text
        mavs['3PT Attempts Per Game'] = row.find('td', {'data-stat': 'fg3a_per_g'}).text
        mavs['3PT %'] = row.find('td', {'data-stat': 'fg3_pct'}).text
        mavs['2PT Per Game'] = row.find('td', {'data-stat': 'fg2_per_g'}).text
        mavs['2PT Attempts Per Game'] = row.find('td', {'data-stat': 'fg2a_per_g'}).text
        mavs['2PT %'] = row.find('td', {'data-stat': 'fg2_pct'}).text
        mavs['Effective FG%'] = row.find('td', {'data-stat': 'efg_pct'}).text
        mavs['FT Per Game'] = row.find('td', {'data-stat': 'ft_per_g'}).text
        mavs['FT Attempts Per Game'] = row.find('td', {'data-stat': 'fta_per_g'}).text
        mavs['FT %'] = row.find('td', {'data-stat': 'ft_pct'}).text
        mavs['Offensive Reb Per Game'] = row.find('td', {'data-stat': 'orb_per_g'}).text
        mavs['Defensive Reb Per Game'] = row.find('td', {'data-stat': 'drb_per_g'}).text
        mavs['Total Reb Per Game'] = row.find('td', {'data-stat': 'trb_per_g'}).text
        mavs['Assists Per Game'] = row.find('td', {'data-stat': 'ast_per_g'}).text
        mavs['Steals Per Game'] = row.find('td', {'data-stat': 'stl_per_g'}).text
        mavs['Blocks Per Game'] = row.find('td', {'data-stat': 'blk_per_g'}).text
        mavs['Turnovers Per Game'] = row.find('td', {'data-stat': 'tov_per_g'}).text
        mavs['Fouls Per Game'] = row.find('td', {'data-stat': 'pf_per_g'}).text
        mavs['Points Per Game'] = row.find('td', {'data-stat': 'pts_per_g'}).text
        dallas.append(mavs)

In [39]:
dirk = pd.DataFrame(dallas, columns=per_game)
dirk

Unnamed: 0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,FT %,Offensive Reb Per Game,Defensive Reb Per Game,Total Reb Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Fouls Per Game,Points Per Game
0,20,47,20.4,2.9,7.1,0.405,0.3,1.4,0.206,2.6,...,0.773,0.9,2.6,3.4,1.0,0.6,0.6,1.6,2.2,8.2
1,21,82,35.8,6.3,13.6,0.461,1.4,3.7,0.379,4.9,...,0.83,1.2,5.2,6.5,2.5,0.8,0.8,1.7,3.1,17.5
2,22,82,38.1,7.2,15.2,0.474,1.8,4.8,0.387,5.4,...,0.838,1.5,7.7,9.2,2.1,1.0,1.2,1.9,3.0,21.8
3,23,76,38.0,7.9,16.6,0.477,1.8,4.6,0.397,6.1,...,0.853,1.6,8.4,9.9,2.4,1.1,1.0,1.9,2.9,23.4
4,24,80,39.0,8.6,18.6,0.463,1.9,4.9,0.379,6.8,...,0.881,1.0,8.9,9.9,3.0,1.4,1.0,1.9,2.6,25.1
5,25,77,37.9,7.9,17.0,0.462,1.3,3.8,0.341,6.6,...,0.877,1.2,7.5,8.7,2.7,1.2,1.4,1.8,2.8,21.8
6,26,78,38.7,8.5,18.5,0.459,1.2,2.9,0.399,7.3,...,0.869,1.2,8.5,9.7,3.1,1.2,1.5,2.3,2.8,26.1
7,27,81,38.1,9.3,19.3,0.48,1.4,3.3,0.406,7.9,...,0.901,1.4,7.6,9.0,2.8,0.7,1.0,1.9,2.0,26.6
8,28,78,36.2,8.6,17.2,0.502,0.9,2.2,0.416,7.7,...,0.904,1.6,7.3,8.9,3.4,0.7,0.8,2.1,2.2,24.6
9,29,77,36.0,8.2,17.1,0.479,1.0,2.9,0.359,7.2,...,0.879,1.2,7.3,8.6,3.5,0.7,0.9,2.1,2.6,23.6


#### Advanced

In [40]:
adv_dallas = []

table = soup.find('table', {'id': 'advanced'})

for row in table.find_all('tr')[1:]:
    adv_mavs = {}
    adv_mavs['Adv Age'] = row.find('td', {'data-stat':'age'}).text
    adv_mavs['PER'] = row.find('td', {'data-stat': 'per'}).text
    adv_mavs['True Shooting'] = row.find('td', {'data-stat': 'ts_pct'}).text
    adv_mavs['Total Rebounding %'] = row.find('td', {'data-stat': 'trb_pct'}).text
    adv_mavs['Assist %'] = row.find('td', {'data-stat': 'ast_pct'}).text
    adv_mavs['Steal %'] = row.find('td', {'data-stat': 'stl_pct'}).text
    adv_mavs['Block %'] = row.find('td', {'data-stat': 'blk_pct'}).text
    adv_mavs['Turnover %'] = row.find('td', {'data-stat': 'tov_pct'}).text
    adv_mavs['Usage %'] = row.find('td', {'data-stat': 'usg_pct'}).text
    adv_mavs['Win Shares'] = row.find('td', {'data-stat': 'ws'}).text
    adv_mavs['Offensive Box +/-'] = row.find('td', {'data-stat': 'obpm'}).text
    adv_mavs['Defensive Box +/-'] = row.find('td', {'data-stat': 'dbpm'}).text
    adv_mavs['Box +/-'] = row.find('td', {'data-stat': 'bpm'}).text
    adv_mavs['VORP'] = row.find('td', {'data-stat': 'vorp'}).text
    adv_dallas.append(adv_mavs)

In [41]:
adv_dirk = pd.DataFrame(adv_dallas, columns=advanced)
adv_dirk

Unnamed: 0,Adv Age,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,Offensive Box +/-,Defensive Box +/-,Box +/-,VORP
0,20.0,12.8,0.491,9.5,8.4,1.6,2.0,15.7,22.3,0.8,-2.0,-0.9,-2.9,-0.2
1,21.0,17.5,0.564,9.7,10.9,1.1,1.6,10.0,21.2,8.1,2.3,-0.6,1.7,2.7
2,22.0,22.8,0.601,13.5,9.4,1.3,2.3,9.5,23.8,14.6,4.3,0.5,4.7,5.3
3,23.0,24.1,0.599,14.5,10.8,1.5,1.9,8.9,25.5,13.4,4.6,0.1,4.7,4.8
4,24.0,25.6,0.581,14.0,13.3,1.8,2.0,8.1,27.4,16.1,4.6,1.0,5.7,6.1
5,25.0,22.5,0.561,12.5,11.3,1.6,2.6,8.3,24.5,11.5,3.5,0.0,3.4,4.0
6,26.0,26.1,0.578,14.0,14.3,1.7,2.9,9.1,28.7,15.6,4.0,1.7,5.7,5.9
7,27.0,28.1,0.589,14.2,14.7,1.0,2.1,7.9,30.0,17.7,5.8,-0.1,5.7,6.0
8,28.0,27.6,0.605,14.8,17.8,1.0,1.7,9.5,28.9,16.3,5.4,1.3,6.7,6.2
9,29.0,24.6,0.585,13.8,17.8,1.0,1.9,9.3,28.8,12.9,4.2,0.9,5.2,5.0


In [42]:
adv_dirk = adv_dirk.drop(adv_dirk.index[[20]])
adv_dirk

Unnamed: 0,Adv Age,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,Offensive Box +/-,Defensive Box +/-,Box +/-,VORP
0,20,12.8,0.491,9.5,8.4,1.6,2.0,15.7,22.3,0.8,-2.0,-0.9,-2.9,-0.2
1,21,17.5,0.564,9.7,10.9,1.1,1.6,10.0,21.2,8.1,2.3,-0.6,1.7,2.7
2,22,22.8,0.601,13.5,9.4,1.3,2.3,9.5,23.8,14.6,4.3,0.5,4.7,5.3
3,23,24.1,0.599,14.5,10.8,1.5,1.9,8.9,25.5,13.4,4.6,0.1,4.7,4.8
4,24,25.6,0.581,14.0,13.3,1.8,2.0,8.1,27.4,16.1,4.6,1.0,5.7,6.1
5,25,22.5,0.561,12.5,11.3,1.6,2.6,8.3,24.5,11.5,3.5,0.0,3.4,4.0
6,26,26.1,0.578,14.0,14.3,1.7,2.9,9.1,28.7,15.6,4.0,1.7,5.7,5.9
7,27,28.1,0.589,14.2,14.7,1.0,2.1,7.9,30.0,17.7,5.8,-0.1,5.7,6.0
8,28,27.6,0.605,14.8,17.8,1.0,1.7,9.5,28.9,16.3,5.4,1.3,6.7,6.2
9,29,24.6,0.585,13.8,17.8,1.0,1.9,9.3,28.8,12.9,4.2,0.9,5.2,5.0


### Steph Curry
#### Per Game

In [43]:
driver = webdriver.Chrome()
driver.get('https://www.basketball-reference.com/players/c/curryst01.html')
soup = BeautifulSoup(driver.page_source, 'lxml')
driver.quit()
#table = soup.find_all('table')[14] #advanced stats
table = soup.find_all('table')[9] #per_game stats

In [44]:
table

<table class="row_summable sortable stats_table now_sortable" data-cols-to-freeze="1" id="per_game"><caption>Per Game Table</caption>
<colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
<thead>
<tr>
<th aria-label="If listed as single number, the year the season ended.★ - Indicates All-Star for league.Only on regular season tables." class=" poptip sort_default_asc center" data-stat="season" data-tip="If listed as single number, the year the season ended.&lt;br&gt;★ - Indicates All-Star for league.&lt;br&gt;Only on regular season tables." scope="col">Season</th>
<th aria-label="Age of Player at the start of February 1st of that season." class=" poptip sort_default_asc center" data-stat="age" data-tip="Age of Player at the start of February 1st of that season." scope="col">Age</th>
<th aria-label="Team" class=" poptip sort_default_asc cente

In [45]:
warriors = []

table = soup.find('table', {'id': 'per_game'})
for row in table.find_all('tr')[1:21]:
        splash = {}
        splash['Age'] = row.find('td', {'data-stat': 'age'}).text
        splash['Games'] = row.find('td', {'data-stat': 'g'}).text
        splash['Minutes Per Game'] = row.find('td', {'data-stat': 'mp_per_g'}).text
        splash['FG Per Game'] = row.find('td', {'data-stat': 'fg_per_g'}).text
        splash['FG Attempts Per Game'] = row.find('td', {'data-stat': 'fga_per_g'}).text
        splash['FG %'] = row.find('td', {'data-stat': 'fg_pct'}).text
        splash['3PT Per Game'] = row.find('td', {'data-stat': 'fg3_per_g'}).text
        splash['3PT Attempts Per Game'] = row.find('td', {'data-stat': 'fg3a_per_g'}).text
        splash['3PT %'] = row.find('td', {'data-stat': 'fg3_pct'}).text
        splash['2PT Per Game'] = row.find('td', {'data-stat': 'fg2_per_g'}).text
        splash['2PT Attempts Per Game'] = row.find('td', {'data-stat': 'fg2a_per_g'}).text
        splash['2PT %'] = row.find('td', {'data-stat': 'fg2_pct'}).text
        splash['Effective FG%'] = row.find('td', {'data-stat': 'efg_pct'}).text
        splash['FT Per Game'] = row.find('td', {'data-stat': 'ft_per_g'}).text
        splash['FT Attempts Per Game'] = row.find('td', {'data-stat': 'fta_per_g'}).text
        splash['FT %'] = row.find('td', {'data-stat': 'ft_pct'}).text
        splash['Offensive Reb Per Game'] = row.find('td', {'data-stat': 'orb_per_g'}).text
        splash['Defensive Reb Per Game'] = row.find('td', {'data-stat': 'drb_per_g'}).text
        splash['Total Reb Per Game'] = row.find('td', {'data-stat': 'trb_per_g'}).text
        splash['Assists Per Game'] = row.find('td', {'data-stat': 'ast_per_g'}).text
        splash['Steals Per Game'] = row.find('td', {'data-stat': 'stl_per_g'}).text
        splash['Blocks Per Game'] = row.find('td', {'data-stat': 'blk_per_g'}).text
        splash['Turnovers Per Game'] = row.find('td', {'data-stat': 'tov_per_g'}).text
        splash['Fouls Per Game'] = row.find('td', {'data-stat': 'pf_per_g'}).text
        splash['Points Per Game'] = row.find('td', {'data-stat': 'pts_per_g'}).text
        warriors.append(splash)

In [54]:
steph = pd.DataFrame(warriors, columns=per_game)
steph

Unnamed: 0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,FT %,Offensive Reb Per Game,Defensive Reb Per Game,Total Reb Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Fouls Per Game,Points Per Game
0,21.0,80,36.2,6.6,14.3,0.462,2.1,4.8,0.437,4.5,...,0.885,0.6,3.9,4.5,5.9,1.9,0.2,3.0,3.2,17.5
1,22.0,74,33.6,6.8,14.2,0.48,2.0,4.6,0.442,4.8,...,0.934,0.7,3.2,3.9,5.8,1.5,0.3,3.1,3.1,18.6
2,23.0,26,28.2,5.6,11.4,0.49,2.1,4.7,0.455,3.5,...,0.809,0.6,2.8,3.4,5.3,1.5,0.3,2.5,2.4,14.7
3,24.0,78,38.2,8.0,17.8,0.451,3.5,7.7,0.453,4.5,...,0.9,0.8,3.3,4.0,6.9,1.6,0.2,3.1,2.5,22.9
4,25.0,78,36.5,8.4,17.7,0.471,3.3,7.9,0.424,5.0,...,0.885,0.6,3.7,4.3,8.5,1.6,0.2,3.8,2.5,24.0
5,26.0,80,32.7,8.2,16.8,0.487,3.6,8.1,0.443,4.6,...,0.914,0.7,3.6,4.3,7.7,2.0,0.2,3.1,2.0,23.8
6,27.0,79,34.2,10.2,20.2,0.504,5.1,11.2,0.454,5.1,...,0.908,0.9,4.6,5.4,6.7,2.1,0.2,3.3,2.0,30.1
7,28.0,79,33.4,8.5,18.3,0.468,4.1,10.0,0.411,4.4,...,0.898,0.8,3.7,4.5,6.6,1.8,0.2,3.0,2.3,25.3
8,29.0,51,32.0,8.4,16.9,0.495,4.2,9.8,0.423,4.2,...,0.921,0.7,4.4,5.1,6.1,1.6,0.2,3.0,2.2,26.4
9,30.0,1,37.0,11.0,20.0,0.55,5.0,9.0,0.556,6.0,...,1.0,0.0,8.0,8.0,9.0,1.0,0.0,3.0,4.0,32.0


In [55]:
steph = steph.drop(steph.index[[9, 10]])
steph

Unnamed: 0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,FT %,Offensive Reb Per Game,Defensive Reb Per Game,Total Reb Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Fouls Per Game,Points Per Game
0,21,80,36.2,6.6,14.3,0.462,2.1,4.8,0.437,4.5,...,0.885,0.6,3.9,4.5,5.9,1.9,0.2,3.0,3.2,17.5
1,22,74,33.6,6.8,14.2,0.48,2.0,4.6,0.442,4.8,...,0.934,0.7,3.2,3.9,5.8,1.5,0.3,3.1,3.1,18.6
2,23,26,28.2,5.6,11.4,0.49,2.1,4.7,0.455,3.5,...,0.809,0.6,2.8,3.4,5.3,1.5,0.3,2.5,2.4,14.7
3,24,78,38.2,8.0,17.8,0.451,3.5,7.7,0.453,4.5,...,0.9,0.8,3.3,4.0,6.9,1.6,0.2,3.1,2.5,22.9
4,25,78,36.5,8.4,17.7,0.471,3.3,7.9,0.424,5.0,...,0.885,0.6,3.7,4.3,8.5,1.6,0.2,3.8,2.5,24.0
5,26,80,32.7,8.2,16.8,0.487,3.6,8.1,0.443,4.6,...,0.914,0.7,3.6,4.3,7.7,2.0,0.2,3.1,2.0,23.8
6,27,79,34.2,10.2,20.2,0.504,5.1,11.2,0.454,5.1,...,0.908,0.9,4.6,5.4,6.7,2.1,0.2,3.3,2.0,30.1
7,28,79,33.4,8.5,18.3,0.468,4.1,10.0,0.411,4.4,...,0.898,0.8,3.7,4.5,6.6,1.8,0.2,3.0,2.3,25.3
8,29,51,32.0,8.4,16.9,0.495,4.2,9.8,0.423,4.2,...,0.921,0.7,4.4,5.1,6.1,1.6,0.2,3.0,2.2,26.4


#### Advanced

In [56]:
adv_warriors = []

table = soup.find('table', {'id': 'advanced'})

for row in table.find_all('tr')[1:]:
    adv_splash = {}
    adv_splash['Adv Age'] = row.find('td', {'data-stat':'age'}).text
    adv_splash['PER'] = row.find('td', {'data-stat': 'per'}).text
    adv_splash['True Shooting'] = row.find('td', {'data-stat': 'ts_pct'}).text
    adv_splash['Total Rebounding %'] = row.find('td', {'data-stat': 'trb_pct'}).text
    adv_splash['Assist %'] = row.find('td', {'data-stat': 'ast_pct'}).text
    adv_splash['Steal %'] = row.find('td', {'data-stat': 'stl_pct'}).text
    adv_splash['Block %'] = row.find('td', {'data-stat': 'blk_pct'}).text
    adv_splash['Turnover %'] = row.find('td', {'data-stat': 'tov_pct'}).text
    adv_splash['Usage %'] = row.find('td', {'data-stat': 'usg_pct'}).text
    adv_splash['Win Shares'] = row.find('td', {'data-stat': 'ws'}).text
    adv_splash['Offensive Box +/-'] = row.find('td', {'data-stat': 'obpm'}).text
    adv_splash['Defensive Box +/-'] = row.find('td', {'data-stat': 'dbpm'}).text
    adv_splash['Box +/-'] = row.find('td', {'data-stat': 'bpm'}).text
    adv_splash['VORP'] = row.find('td', {'data-stat': 'vorp'}).text
    adv_warriors.append(adv_splash)

In [57]:
adv_steph = pd.DataFrame(adv_warriors, columns=advanced)
adv_steph

Unnamed: 0,Adv Age,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,Offensive Box +/-,Defensive Box +/-,Box +/-,VORP
0,21.0,16.3,0.568,6.8,24.6,2.5,0.5,16.5,21.8,4.7,1.8,-0.7,1.0,2.2
1,22.0,19.4,0.595,6.5,28.1,2.2,0.6,16.4,24.4,6.6,3.5,-1.6,1.9,2.4
2,23.0,21.2,0.605,6.8,32.3,2.8,0.8,17.0,24.0,2.2,4.7,-1.3,3.4,1.0
3,24.0,21.3,0.589,5.8,31.1,2.1,0.3,13.7,26.4,11.2,6.3,-0.9,5.4,5.6
4,25.0,24.1,0.61,6.4,39.9,2.2,0.4,16.1,28.3,13.4,7.4,0.0,7.4,6.7
5,26.0,28.0,0.638,7.0,38.6,3.0,0.5,14.3,28.9,15.7,9.6,0.3,9.9,7.9
6,27.0,31.5,0.669,8.6,33.7,3.0,0.4,12.9,32.6,17.9,12.4,0.1,12.5,9.8
7,28.0,24.6,0.624,7.3,31.2,2.6,0.5,13.0,30.1,12.6,7.8,-0.4,7.3,6.2
8,29.0,28.2,0.675,9.0,30.3,2.4,0.4,13.3,31.0,9.1,9.9,-1.3,8.6,4.4
9,30.0,37.2,0.721,10.1,42.1,1.3,0.0,11.9,26.6,0.4,15.7,-1.9,13.8,0.1


In [58]:
adv_steph = adv_steph.drop(adv_steph.index[[9, 10]])
adv_steph

Unnamed: 0,Adv Age,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,Offensive Box +/-,Defensive Box +/-,Box +/-,VORP
0,21,16.3,0.568,6.8,24.6,2.5,0.5,16.5,21.8,4.7,1.8,-0.7,1.0,2.2
1,22,19.4,0.595,6.5,28.1,2.2,0.6,16.4,24.4,6.6,3.5,-1.6,1.9,2.4
2,23,21.2,0.605,6.8,32.3,2.8,0.8,17.0,24.0,2.2,4.7,-1.3,3.4,1.0
3,24,21.3,0.589,5.8,31.1,2.1,0.3,13.7,26.4,11.2,6.3,-0.9,5.4,5.6
4,25,24.1,0.61,6.4,39.9,2.2,0.4,16.1,28.3,13.4,7.4,0.0,7.4,6.7
5,26,28.0,0.638,7.0,38.6,3.0,0.5,14.3,28.9,15.7,9.6,0.3,9.9,7.9
6,27,31.5,0.669,8.6,33.7,3.0,0.4,12.9,32.6,17.9,12.4,0.1,12.5,9.8
7,28,24.6,0.624,7.3,31.2,2.6,0.5,13.0,30.1,12.6,7.8,-0.4,7.3,6.2
8,29,28.2,0.675,9.0,30.3,2.4,0.4,13.3,31.0,9.1,9.9,-1.3,8.6,4.4


### Adjustments for Previous Player Data Frames

In [59]:
iverson_season = ['1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009']

dirk_season = ['1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']

dwade_season = ['2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']

steph_season = ['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']

In [90]:
iverson.insert(0, "Season", iverson_season)
dirk.insert(0, "Season", dirk_season)
dwade.insert(0, "Season", dwade_season)
steph.insert(0, "Season", steph_season)

In [61]:
adv_iverson.insert(0, "Season", iverson_season)
adv_dirk.insert(0, "Season", dirk_season)
adv_dwade.insert(0, "Season", dwade_season)
adv_steph.insert(0, "Season", steph_season)

In [62]:
iverson_allstar = [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

dirk_allstar = [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0]

dwade_allstar = [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]

steph_allstar = [0, 0, 0, 0, 1, 1, 1, 1, 1]

iverson_mvp = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]

dirk_mvp = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

dwade_mvp = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

steph_mvp = [0, 0, 0, 0, 0, 1, 1, 0, 0]

iverson_mvp_shares = [0.001, 0.00, 0.270, 0.109, 0.904, 0.023, 0.070, 0.00, 0.189, 0.001, 0.00, 0.00, 0.00, 0.00]

iverson_mvp_placing = [17, 0, 4, 7, 1, 9, 6, 0, 5, 10, 0, 0, 0, 0]

dirk_mvp_shares = [0.00, 0.00, 0.00, 0.025, 0.036, 0.003, 0.275, 0.435, 0.882, 0.004, 0.002, 0.045, 0.093, 0.003, 0.00, 0.006, 0.00, 0.00, 0.00, 0.00]

dirk_mvp_placing = [0, 0, 0, 8, 7, 10, 3, 3, 1, 11, 10, 7, 6, 12, 0, 14, 0, 0, 0, 0]

dwade_mvp_shares = [0.00, 0.034, 0.070, 0.002, 0.00, 0.562, 0.097, 0.020, 0.005, 0.004, 0.00, 0.00, 0.00, 0.00, 0.00]

dwade_mvp_placing = [0, 8, 6, 12, 0, 3, 5, 7, 10, 10, 0, 0, 0, 0, 0]

steph_mvp_shares = [0.00, 0.00, 0.00, 0.002, 0.053, 0.922, 1.000, 0.051, 0.005]

steph_mvp_placing = [0, 0, 0, 11, 6, 1, 1, 6, 10]

iverson_playoffs = [0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0]

iverson_rings = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

dirk_playoffs = [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0]

dirk_rings = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]

dwade_playoffs = [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]

dwade_rings = [0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]

steph_playoffs = [0, 0, 0, 1, 1, 1, 1, 1, 1]

steph_rings = [0, 0, 0, 0, 0, 1, 0, 1, 1]

In [63]:
new_cols = ['Wins Over Replacement', 'All-Star Games', 'MVP Trophy', "MVP Placing", "Shares of MVP Votes", "Playoff Appearance", "NBA Championship"]

In [64]:
adv_iverson = adv_iverson.reindex(columns=adv_iverson.columns.tolist() + new_cols)
adv_dirk = adv_dirk.reindex(columns=adv_dirk.columns.tolist() + new_cols)
adv_dwade = adv_dwade.reindex(columns=adv_dwade.columns.tolist() + new_cols)
adv_steph = adv_steph.reindex(columns=adv_steph.columns.tolist() + new_cols)

In [65]:
adv_iverson["All-Star Games"] = iverson_allstar
adv_iverson["MVP Trophy"] = iverson_mvp
adv_iverson["MVP Placing"] = iverson_mvp_placing
adv_iverson["Shares of MVP Votes"] = iverson_mvp_shares
adv_iverson["Playoff Appearance"] = iverson_playoffs
adv_iverson["NBA Championship"] = iverson_rings

In [66]:
adv_dirk["All-Star Games"] = dirk_allstar
adv_dirk["MVP Trophy"] = dirk_mvp
adv_dirk["MVP Placing"] = dirk_mvp_placing
adv_dirk["Shares of MVP Votes"] = dirk_mvp_shares
adv_dirk["Playoff Appearance"] = dirk_playoffs
adv_dirk["NBA Championship"] = dirk_rings

In [67]:
adv_dwade["All-Star Games"] = dwade_allstar
adv_dwade["MVP Trophy"] = dwade_mvp
adv_dwade["MVP Placing"] = dwade_mvp_placing
adv_dwade["Shares of MVP Votes"] = dwade_mvp_shares
adv_dwade["Playoff Appearance"] = dwade_playoffs
adv_dwade["NBA Championship"] = dwade_rings

In [68]:
adv_steph["All-Star Games"] = steph_allstar
adv_steph["MVP Trophy"] = steph_mvp
adv_steph["MVP Placing"] = steph_mvp_placing
adv_steph["Shares of MVP Votes"] = steph_mvp_shares
adv_steph["Playoff Appearance"] = steph_playoffs
adv_steph["NBA Championship"] = steph_rings

In [69]:
iverson['Age'] = iverson['Age'].astype(int)
iverson['Games'] = iverson['Games'].astype(int)
iverson['Minutes Per Game'] = iverson['Minutes Per Game'].astype(float)
iverson['FG Per Game'] = iverson['FG Per Game'].astype(float)
iverson['FG Attempts Per Game'] = iverson['FG Attempts Per Game'].astype(float)
iverson['FG %'] = iverson['FG %'].astype(float)
iverson['3PT Per Game'] = iverson['3PT Per Game'].astype(float)
iverson['3PT Attempts Per Game'] = iverson['3PT Attempts Per Game'].astype(float)
iverson['3PT %'] = iverson['3PT %'].astype(float)
iverson['2PT Per Game'] = iverson['2PT Per Game'].astype(float)
iverson['2PT Attempts Per Game'] = iverson['2PT Attempts Per Game'].astype(float)
iverson['2PT %'] = iverson['2PT %'].astype(float)
iverson['Effective FG%'] = iverson['Effective FG%'].astype(float)
iverson['FT Per Game'] = iverson['FT Per Game'].astype(float)
iverson['FT Attempts Per Game'] = iverson['FT Attempts Per Game'].astype(float)
iverson['FT %'] = iverson['FT %'].astype(float)
iverson['Offensive Reb Per Game'] = iverson['Offensive Reb Per Game'].astype(float)
iverson['Defensive Reb Per Game'] = iverson['Defensive Reb Per Game'].astype(float)
iverson['Total Reb Per Game'] = iverson['Total Reb Per Game'].astype(float)
iverson['Assists Per Game'] = iverson['Assists Per Game'].astype(float)
iverson['Steals Per Game'] = iverson['Steals Per Game'].astype(float)
iverson['Blocks Per Game'] = iverson['Blocks Per Game'].astype(float)
iverson['Turnovers Per Game'] = iverson['Turnovers Per Game'].astype(float)
iverson['Fouls Per Game'] = iverson['Fouls Per Game'].astype(float)
iverson['Points Per Game'] = iverson['Points Per Game'].astype(float)

In [70]:
dirk['Age'] = dirk['Age'].astype(int)
dirk['Games'] = dirk['Games'].astype(int)
dirk['Minutes Per Game'] = dirk['Minutes Per Game'].astype(float)
dirk['FG Per Game'] = dirk['FG Per Game'].astype(float)
dirk['FG Attempts Per Game'] = dirk['FG Attempts Per Game'].astype(float)
dirk['FG %'] = dirk['FG %'].astype(float)
dirk['3PT Per Game'] = dirk['3PT Per Game'].astype(float)
dirk['3PT Attempts Per Game'] = dirk['3PT Attempts Per Game'].astype(float)
dirk['3PT %'] = dirk['3PT %'].astype(float)
dirk['2PT Per Game'] = dirk['2PT Per Game'].astype(float)
dirk['2PT Attempts Per Game'] = dirk['2PT Attempts Per Game'].astype(float)
dirk['2PT %'] = dirk['2PT %'].astype(float)
dirk['Effective FG%'] = dirk['Effective FG%'].astype(float)
dirk['FT Per Game'] = dirk['FT Per Game'].astype(float)
dirk['FT Attempts Per Game'] = dirk['FT Attempts Per Game'].astype(float)
dirk['FT %'] = dirk['FT %'].astype(float)
dirk['Offensive Reb Per Game'] = dirk['Offensive Reb Per Game'].astype(float)
dirk['Defensive Reb Per Game'] = dirk['Defensive Reb Per Game'].astype(float)
dirk['Total Reb Per Game'] = dirk['Total Reb Per Game'].astype(float)
dirk['Assists Per Game'] = dirk['Assists Per Game'].astype(float)
dirk['Steals Per Game'] = dirk['Steals Per Game'].astype(float)
dirk['Blocks Per Game'] = dirk['Blocks Per Game'].astype(float)
dirk['Turnovers Per Game'] = dirk['Turnovers Per Game'].astype(float)
dirk['Fouls Per Game'] = dirk['Fouls Per Game'].astype(float)
dirk['Points Per Game'] = dirk['Points Per Game'].astype(float)

In [71]:
dwade['Age'] = dwade['Age'].astype(int)
dwade['Games'] = dwade['Games'].astype(int)
dwade['Minutes Per Game'] = dwade['Minutes Per Game'].astype(float)
dwade['FG Per Game'] = dwade['FG Per Game'].astype(float)
dwade['FG Attempts Per Game'] = dwade['FG Attempts Per Game'].astype(float)
dwade['FG %'] = dwade['FG %'].astype(float)
dwade['3PT Per Game'] = dwade['3PT Per Game'].astype(float)
dwade['3PT Attempts Per Game'] = dwade['3PT Attempts Per Game'].astype(float)
dwade['3PT %'] = dwade['3PT %'].astype(float)
dwade['2PT Per Game'] = dwade['2PT Per Game'].astype(float)
dwade['2PT Attempts Per Game'] = dwade['2PT Attempts Per Game'].astype(float)
dwade['2PT %'] = dwade['2PT %'].astype(float)
dwade['Effective FG%'] = dwade['Effective FG%'].astype(float)
dwade['FT Per Game'] = dwade['FT Per Game'].astype(float)
dwade['FT Attempts Per Game'] = dwade['FT Attempts Per Game'].astype(float)
dwade['FT %'] = dwade['FT %'].astype(float)
dwade['Offensive Reb Per Game'] = dwade['Offensive Reb Per Game'].astype(float)
dwade['Defensive Reb Per Game'] = dwade['Defensive Reb Per Game'].astype(float)
dwade['Total Reb Per Game'] = dwade['Total Reb Per Game'].astype(float)
dwade['Assists Per Game'] = dwade['Assists Per Game'].astype(float)
dwade['Steals Per Game'] = dwade['Steals Per Game'].astype(float)
dwade['Blocks Per Game'] = dwade['Blocks Per Game'].astype(float)
dwade['Turnovers Per Game'] = dwade['Turnovers Per Game'].astype(float)
dwade['Fouls Per Game'] = dwade['Fouls Per Game'].astype(float)
dwade['Points Per Game'] = dwade['Points Per Game'].astype(float)

In [72]:
steph['Age'] = steph['Age'].astype(int)
steph['Games'] = steph['Games'].astype(int)
steph['Minutes Per Game'] = steph['Minutes Per Game'].astype(float)
steph['FG Per Game'] = steph['FG Per Game'].astype(float)
steph['FG Attempts Per Game'] = steph['FG Attempts Per Game'].astype(float)
steph['FG %'] = steph['FG %'].astype(float)
steph['3PT Per Game'] = steph['3PT Per Game'].astype(float)
steph['3PT Attempts Per Game'] = steph['3PT Attempts Per Game'].astype(float)
steph['3PT %'] = steph['3PT %'].astype(float)
steph['2PT Per Game'] = steph['2PT Per Game'].astype(float)
steph['2PT Attempts Per Game'] = steph['2PT Attempts Per Game'].astype(float)
steph['2PT %'] = steph['2PT %'].astype(float)
steph['Effective FG%'] = steph['Effective FG%'].astype(float)
steph['FT Per Game'] = steph['FT Per Game'].astype(float)
steph['FT Attempts Per Game'] = steph['FT Attempts Per Game'].astype(float)
steph['FT %'] = steph['FT %'].astype(float)
steph['Offensive Reb Per Game'] = steph['Offensive Reb Per Game'].astype(float)
steph['Defensive Reb Per Game'] = steph['Defensive Reb Per Game'].astype(float)
steph['Total Reb Per Game'] = steph['Total Reb Per Game'].astype(float)
steph['Assists Per Game'] = steph['Assists Per Game'].astype(float)
steph['Steals Per Game'] = steph['Steals Per Game'].astype(float)
steph['Blocks Per Game'] = steph['Blocks Per Game'].astype(float)
steph['Turnovers Per Game'] = steph['Turnovers Per Game'].astype(float)
steph['Fouls Per Game'] = steph['Fouls Per Game'].astype(float)
steph['Points Per Game'] = steph['Points Per Game'].astype(float)

In [73]:
adv_iverson['Adv Age'] = adv_iverson['Adv Age'].astype(int)
adv_iverson['PER'] = adv_iverson['PER'].astype(float)
adv_iverson['True Shooting'] = adv_iverson['True Shooting'].astype(float)
adv_iverson['Total Rebounding %'] = adv_iverson['Total Rebounding %'].astype(float)
adv_iverson['Assist %'] = adv_iverson['Assist %'].astype(float)
adv_iverson['Steal %'] = adv_iverson['Steal %'].astype(float)
adv_iverson['Block %'] = adv_iverson['Block %'].astype(float)
adv_iverson['Turnover %'] = adv_iverson['Turnover %'].astype(float)
adv_iverson['Usage %'] = adv_iverson['Usage %'].astype(float)
adv_iverson['Win Shares'] = adv_iverson['Win Shares'].astype(float)
adv_iverson['Offensive Box +/-'] = adv_iverson['Offensive Box +/-'].astype(float)
adv_iverson['Defensive Box +/-'] = adv_iverson['Defensive Box +/-'].astype(float)
adv_iverson['Box +/-'] = adv_iverson['Box +/-'].astype(float)
adv_iverson['VORP'] = adv_iverson['VORP'].astype(float)

adv_iverson['Wins Over Replacement'] = adv_iverson['VORP'] * 2.70

In [74]:
adv_steph['Adv Age'] = adv_steph['Adv Age'].astype(int)
adv_steph['PER'] = adv_steph['PER'].astype(float)
adv_steph['True Shooting'] = adv_steph['True Shooting'].astype(float)
adv_steph['Total Rebounding %'] = adv_steph['Total Rebounding %'].astype(float)
adv_steph['Assist %'] = adv_steph['Assist %'].astype(float)
adv_steph['Steal %'] = adv_steph['Steal %'].astype(float)
adv_steph['Block %'] = adv_steph['Block %'].astype(float)
adv_steph['Turnover %'] = adv_steph['Turnover %'].astype(float)
adv_steph['Usage %'] = adv_steph['Usage %'].astype(float)
adv_steph['Win Shares'] = adv_steph['Win Shares'].astype(float)
adv_steph['Offensive Box +/-'] = adv_steph['Offensive Box +/-'].astype(float)
adv_steph['Defensive Box +/-'] = adv_steph['Defensive Box +/-'].astype(float)
adv_steph['Box +/-'] = adv_steph['Box +/-'].astype(float)
adv_steph['VORP'] = adv_steph['VORP'].astype(float)

adv_steph['Wins Over Replacement'] = adv_steph['VORP'] * 2.70

In [75]:
adv_dirk['Adv Age'] = adv_dirk['Adv Age'].astype(int)
adv_dirk['PER'] = adv_dirk['PER'].astype(float)
adv_dirk['True Shooting'] = adv_dirk['True Shooting'].astype(float)
adv_dirk['Total Rebounding %'] = adv_dirk['Total Rebounding %'].astype(float)
adv_dirk['Assist %'] = adv_dirk['Assist %'].astype(float)
adv_dirk['Steal %'] = adv_dirk['Steal %'].astype(float)
adv_dirk['Block %'] = adv_dirk['Block %'].astype(float)
adv_dirk['Turnover %'] = adv_dirk['Turnover %'].astype(float)
adv_dirk['Usage %'] = adv_dirk['Usage %'].astype(float)
adv_dirk['Win Shares'] = adv_dirk['Win Shares'].astype(float)
adv_dirk['Offensive Box +/-'] = adv_dirk['Offensive Box +/-'].astype(float)
adv_dirk['Defensive Box +/-'] = adv_dirk['Defensive Box +/-'].astype(float)
adv_dirk['Box +/-'] = adv_dirk['Box +/-'].astype(float)
adv_dirk['VORP'] = adv_dirk['VORP'].astype(float)

adv_dirk['Wins Over Replacement'] = adv_dirk['VORP'] * 2.70

In [76]:
adv_dwade['Adv Age'] = adv_dwade['Adv Age'].astype(int)
adv_dwade['PER'] = adv_dwade['PER'].astype(float)
adv_dwade['True Shooting'] = adv_dwade['True Shooting'].astype(float)
adv_dwade['Total Rebounding %'] = adv_dwade['Total Rebounding %'].astype(float)
adv_dwade['Assist %'] = adv_dwade['Assist %'].astype(float)
adv_dwade['Steal %'] = adv_dwade['Steal %'].astype(float)
adv_dwade['Block %'] = adv_dwade['Block %'].astype(float)
adv_dwade['Turnover %'] = adv_dwade['Turnover %'].astype(float)
adv_dwade['Usage %'] = adv_dwade['Usage %'].astype(float)
adv_dwade['Win Shares'] = adv_dwade['Win Shares'].astype(float)
adv_dwade['Offensive Box +/-'] = adv_dwade['Offensive Box +/-'].astype(float)
adv_dwade['Defensive Box +/-'] = adv_dwade['Defensive Box +/-'].astype(float)
adv_dwade['Box +/-'] = adv_dwade['Box +/-'].astype(float)
adv_dwade['VORP'] = adv_dwade['VORP'].astype(float)

adv_dwade['Wins Over Replacement'] = adv_dwade['VORP'] * 2.70

In [77]:
adv_iverson.drop(['Adv Age'], axis=1, inplace=True)

In [78]:
adv_dirk.drop(['Adv Age'], axis=1, inplace=True)

In [79]:
adv_dwade.drop(['Adv Age'], axis=1, inplace=True)

In [80]:
adv_steph.drop(['Adv Age'], axis=1, inplace=True)

In [81]:
iverson = pd.merge(iverson, adv_iverson, on="Season")
iverson

Unnamed: 0,Season,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,...,Defensive Box +/-,Box +/-,VORP,Wins Over Replacement,All-Star Games,MVP Trophy,MVP Placing,Shares of MVP Votes,Playoff Appearance,NBA Championship
0,1996,21,76,40.1,8.2,19.8,0.416,2.0,6.0,0.341,...,-2.0,1.5,2.7,7.29,0,0,17,0.001,0,0
1,1997,22,80,39.4,8.1,17.6,0.461,0.9,2.9,0.298,...,-0.7,3.8,4.6,12.42,0,0,0,0.0,0,0
2,1998,23,48,41.5,9.1,22.0,0.412,1.2,4.1,0.291,...,-0.3,4.6,3.3,8.91,0,0,4,0.27,0,0
3,1999,24,70,40.8,10.4,24.8,0.421,1.3,3.7,0.341,...,-0.8,2.6,3.3,8.91,1,0,7,0.109,0,0
4,2000,25,71,42.0,10.7,25.5,0.42,1.4,4.3,0.32,...,-0.1,4.8,5.1,13.77,1,1,1,0.904,1,0
5,2001,26,60,43.7,11.1,27.8,0.398,1.3,4.5,0.291,...,0.2,3.8,3.9,10.53,1,0,9,0.023,1,0
6,2002,27,82,42.5,9.8,23.7,0.414,1.0,3.7,0.277,...,-0.3,2.8,4.2,11.34,1,0,6,0.07,1,0
7,2003,28,48,42.5,9.1,23.4,0.387,1.2,4.1,0.286,...,-0.6,2.5,2.3,6.21,1,0,0,0.0,0,0
8,2004,29,75,42.3,10.3,24.2,0.424,1.4,4.5,0.308,...,-0.5,4.3,5.0,13.5,1,0,5,0.189,1,0
9,2005,30,72,43.1,11.3,25.3,0.447,1.0,3.1,0.323,...,-2.5,3.5,4.3,11.61,1,0,10,0.001,0,0


In [82]:
iverson.set_index("Season", inplace=True)
iverson

Unnamed: 0_level_0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,Defensive Box +/-,Box +/-,VORP,Wins Over Replacement,All-Star Games,MVP Trophy,MVP Placing,Shares of MVP Votes,Playoff Appearance,NBA Championship
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996,21,76,40.1,8.2,19.8,0.416,2.0,6.0,0.341,6.2,...,-2.0,1.5,2.7,7.29,0,0,17,0.001,0,0
1997,22,80,39.4,8.1,17.6,0.461,0.9,2.9,0.298,7.2,...,-0.7,3.8,4.6,12.42,0,0,0,0.0,0,0
1998,23,48,41.5,9.1,22.0,0.412,1.2,4.1,0.291,7.9,...,-0.3,4.6,3.3,8.91,0,0,4,0.27,0,0
1999,24,70,40.8,10.4,24.8,0.421,1.3,3.7,0.341,9.1,...,-0.8,2.6,3.3,8.91,1,0,7,0.109,0,0
2000,25,71,42.0,10.7,25.5,0.42,1.4,4.3,0.32,9.4,...,-0.1,4.8,5.1,13.77,1,1,1,0.904,1,0
2001,26,60,43.7,11.1,27.8,0.398,1.3,4.5,0.291,9.8,...,0.2,3.8,3.9,10.53,1,0,9,0.023,1,0
2002,27,82,42.5,9.8,23.7,0.414,1.0,3.7,0.277,8.8,...,-0.3,2.8,4.2,11.34,1,0,6,0.07,1,0
2003,28,48,42.5,9.1,23.4,0.387,1.2,4.1,0.286,7.9,...,-0.6,2.5,2.3,6.21,1,0,0,0.0,0,0
2004,29,75,42.3,10.3,24.2,0.424,1.4,4.5,0.308,8.9,...,-0.5,4.3,5.0,13.5,1,0,5,0.189,1,0
2005,30,72,43.1,11.3,25.3,0.447,1.0,3.1,0.323,10.3,...,-2.5,3.5,4.3,11.61,1,0,10,0.001,0,0


In [83]:
dirk = pd.merge(dirk, adv_dirk, on="Season")

In [84]:
dirk.set_index("Season", inplace=True)
dirk

Unnamed: 0_level_0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,Defensive Box +/-,Box +/-,VORP,Wins Over Replacement,All-Star Games,MVP Trophy,MVP Placing,Shares of MVP Votes,Playoff Appearance,NBA Championship
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1998,20,47,20.4,2.9,7.1,0.405,0.3,1.4,0.206,2.6,...,-0.9,-2.9,-0.2,-0.54,0,0,0,0.0,0,0
1999,21,82,35.8,6.3,13.6,0.461,1.4,3.7,0.379,4.9,...,-0.6,1.7,2.7,7.29,0,0,0,0.0,0,0
2000,22,82,38.1,7.2,15.2,0.474,1.8,4.8,0.387,5.4,...,0.5,4.7,5.3,14.31,0,0,0,0.0,1,0
2001,23,76,38.0,7.9,16.6,0.477,1.8,4.6,0.397,6.1,...,0.1,4.7,4.8,12.96,1,0,8,0.025,1,0
2002,24,80,39.0,8.6,18.6,0.463,1.9,4.9,0.379,6.8,...,1.0,5.7,6.1,16.47,1,0,7,0.036,1,0
2003,25,77,37.9,7.9,17.0,0.462,1.3,3.8,0.341,6.6,...,0.0,3.4,4.0,10.8,1,0,10,0.003,1,0
2004,26,78,38.7,8.5,18.5,0.459,1.2,2.9,0.399,7.3,...,1.7,5.7,5.9,15.93,1,0,3,0.275,1,0
2005,27,81,38.1,9.3,19.3,0.48,1.4,3.3,0.406,7.9,...,-0.1,5.7,6.0,16.2,1,0,3,0.435,1,0
2006,28,78,36.2,8.6,17.2,0.502,0.9,2.2,0.416,7.7,...,1.3,6.7,6.2,16.74,1,1,1,0.882,1,0
2007,29,77,36.0,8.2,17.1,0.479,1.0,2.9,0.359,7.2,...,0.9,5.2,5.0,13.5,1,0,11,0.004,1,0


In [85]:
dwade = pd.merge(dwade, adv_dwade, on="Season")

In [86]:
dwade.set_index("Season", inplace=True)
dwade

Unnamed: 0_level_0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,Defensive Box +/-,Box +/-,VORP,Wins Over Replacement,All-Star Games,MVP Trophy,MVP Placing,Shares of MVP Votes,Playoff Appearance,NBA Championship
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2003,22,61,34.9,6.1,13.1,0.465,0.3,0.9,0.302,5.8,...,0.4,1.3,1.8,4.86,0,0,0,0.0,1,0
2004,23,77,38.6,8.2,17.1,0.478,0.2,0.6,0.289,8.0,...,1.4,4.4,4.8,12.96,1,0,8,0.034,1,0
2005,24,75,38.6,9.3,18.8,0.495,0.2,1.0,0.171,9.1,...,1.6,7.5,7.0,18.9,1,0,6,0.07,1,1
2006,25,51,37.9,9.3,18.9,0.491,0.4,1.5,0.266,8.8,...,1.9,8.4,5.1,13.77,1,0,12,0.002,1,0
2007,26,51,38.3,8.6,18.4,0.469,0.4,1.5,0.286,8.2,...,0.0,3.0,2.5,6.75,1,0,0,0.0,0,0
2008,27,79,38.6,10.8,22.0,0.491,1.1,3.5,0.317,9.7,...,2.0,10.7,9.7,26.19,1,0,3,0.562,1,0
2009,28,77,36.3,9.3,19.6,0.476,0.9,3.2,0.3,8.4,...,2.0,9.4,8.0,21.6,1,0,5,0.097,1,0
2010,29,76,37.1,9.1,18.2,0.5,0.8,2.7,0.306,8.3,...,1.5,5.9,5.7,15.39,1,0,7,0.02,1,0
2011,30,49,33.2,8.5,17.1,0.497,0.3,1.1,0.268,8.2,...,1.8,6.3,3.4,9.18,1,0,10,0.005,1,1
2012,31,69,34.7,8.2,15.8,0.521,0.2,1.0,0.258,8.0,...,1.1,3.9,3.5,9.45,1,0,10,0.004,1,1


In [87]:
steph.head(3)

Unnamed: 0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,FT %,Offensive Reb Per Game,Defensive Reb Per Game,Total Reb Per Game,Assists Per Game,Steals Per Game,Blocks Per Game,Turnovers Per Game,Fouls Per Game,Points Per Game
0,21,80,36.2,6.6,14.3,0.462,2.1,4.8,0.437,4.5,...,0.885,0.6,3.9,4.5,5.9,1.9,0.2,3.0,3.2,17.5
1,22,74,33.6,6.8,14.2,0.48,2.0,4.6,0.442,4.8,...,0.934,0.7,3.2,3.9,5.8,1.5,0.3,3.1,3.1,18.6
2,23,26,28.2,5.6,11.4,0.49,2.1,4.7,0.455,3.5,...,0.809,0.6,2.8,3.4,5.3,1.5,0.3,2.5,2.4,14.7


In [88]:
adv_steph.head(3)

Unnamed: 0,Season,PER,True Shooting,Total Rebounding %,Assist %,Steal %,Block %,Turnover %,Usage %,Win Shares,...,Defensive Box +/-,Box +/-,VORP,Wins Over Replacement,All-Star Games,MVP Trophy,MVP Placing,Shares of MVP Votes,Playoff Appearance,NBA Championship
0,2009,16.3,0.568,6.8,24.6,2.5,0.5,16.5,21.8,4.7,...,-0.7,1.0,2.2,5.94,0,0,0,0.0,0,0
1,2010,19.4,0.595,6.5,28.1,2.2,0.6,16.4,24.4,6.6,...,-1.6,1.9,2.4,6.48,0,0,0,0.0,0,0
2,2011,21.2,0.605,6.8,32.3,2.8,0.8,17.0,24.0,2.2,...,-1.3,3.4,1.0,2.7,0,0,0,0.0,0,0


In [89]:
steph.columns

Index(['Age', 'Games', 'Minutes Per Game', 'FG Per Game',
       'FG Attempts Per Game', 'FG %', '3PT Per Game', '3PT Attempts Per Game',
       '3PT %', '2PT Per Game', '2PT Attempts Per Game', '2PT %',
       'Effective FG%', 'FT Per Game', 'FT Attempts Per Game', 'FT %',
       'Offensive Reb Per Game', 'Defensive Reb Per Game',
       'Total Reb Per Game', 'Assists Per Game', 'Steals Per Game',
       'Blocks Per Game', 'Turnovers Per Game', 'Fouls Per Game',
       'Points Per Game'],
      dtype='object')

In [91]:
steph = pd.merge(steph, adv_steph, on="Season")

In [92]:
steph.set_index("Season", inplace=True)
steph

Unnamed: 0_level_0,Age,Games,Minutes Per Game,FG Per Game,FG Attempts Per Game,FG %,3PT Per Game,3PT Attempts Per Game,3PT %,2PT Per Game,...,Defensive Box +/-,Box +/-,VORP,Wins Over Replacement,All-Star Games,MVP Trophy,MVP Placing,Shares of MVP Votes,Playoff Appearance,NBA Championship
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009,21,80,36.2,6.6,14.3,0.462,2.1,4.8,0.437,4.5,...,-0.7,1.0,2.2,5.94,0,0,0,0.0,0,0
2010,22,74,33.6,6.8,14.2,0.48,2.0,4.6,0.442,4.8,...,-1.6,1.9,2.4,6.48,0,0,0,0.0,0,0
2011,23,26,28.2,5.6,11.4,0.49,2.1,4.7,0.455,3.5,...,-1.3,3.4,1.0,2.7,0,0,0,0.0,0,0
2012,24,78,38.2,8.0,17.8,0.451,3.5,7.7,0.453,4.5,...,-0.9,5.4,5.6,15.12,0,0,11,0.002,1,0
2013,25,78,36.5,8.4,17.7,0.471,3.3,7.9,0.424,5.0,...,0.0,7.4,6.7,18.09,1,0,6,0.053,1,0
2014,26,80,32.7,8.2,16.8,0.487,3.6,8.1,0.443,4.6,...,0.3,9.9,7.9,21.33,1,1,1,0.922,1,1
2015,27,79,34.2,10.2,20.2,0.504,5.1,11.2,0.454,5.1,...,0.1,12.5,9.8,26.46,1,1,1,1.0,1,0
2016,28,79,33.4,8.5,18.3,0.468,4.1,10.0,0.411,4.4,...,-0.4,7.3,6.2,16.74,1,0,6,0.051,1,1
2017,29,51,32.0,8.4,16.9,0.495,4.2,9.8,0.423,4.2,...,-1.3,8.6,4.4,11.88,1,0,10,0.005,1,1


In [93]:
iverson.to_csv("iverson.csv")
dirk.to_csv("dirk.csv")
dwade.to_csv("dwade.csv")
steph.to_csv("steph.csv")

In [94]:
magic = pd.read_csv("./magic.csv")
bird = pd.read_csv("./bird.csv")
isiah = pd.read_csv("./isiah.csv")
mj = pd.read_csv("./mj.csv")
dream = pd.read_csv("./dream.csv")
shaq = pd.read_csv("./shaq.csv")
tim = pd.read_csv("./tim.csv")
kobe = pd.read_csv("./kobe.csv")
lebron = pd.read_csv("./lebron.csv")
kd = pd.read_csv("./kd.csv")

In [95]:
magic.set_index("Season", inplace=True)
bird.set_index("Season", inplace=True)
isiah.set_index("Season", inplace=True)
mj.set_index("Season", inplace=True)
dream.set_index("Season", inplace=True)
shaq.set_index("Season", inplace=True)
tim.set_index("Season", inplace=True)
kobe.set_index("Season", inplace=True)
lebron.set_index("Season", inplace=True)
kd.set_index("Season", inplace=True)

### Everything in the below cell was done previously and saved to .csv.  So when the file was accessed at the beginning, the cleaning was already done.

In [78]:
# magic.isnull().any().any()

# bird[["3PT %"]]

# bird = bird.fillna(0.00)

# bird

# isiah.isnull().any().any()

# mj.isnull().any().any()

# dream.isnull().sum()

# dream = dream.fillna(0.00)

# dream

# shaq.isnull().any().any()

# tim.isnull().any().any()

# iverson.isnull().any().any()

# kobe.isnull().any().any()

# dirk.isnull().any().any()

# lebron.isnull().any().any()

# dwade.isnull().any().any()

# kd.isnull().any().any()

# steph.isnull().any().any()

In [96]:
magic["identifier"] = "Magic"
bird["identifier"] = "Bird"
isiah["identifier"] = "Zeke"
mj["identifier"] = "Air Jordan"
dream["identifier"] = "Dream"
shaq["identifier"] = "Shaq"
iverson["identifier"] = "The Answer"
tim["identifier"] = "The Big Fundamental"
dirk["identifier"] = "Dirk"
kobe["identifier"] = "Black Mamba"
lebron["identifier"] = "The King"
dwade["identifier"] = "Flash"
kd["identifier"] = "Slim Reaper"
steph["identifier"] = "Chef"

In [97]:
magic["identifier2"] = 1
bird["identifier2"] = 2
isiah["identifier2"] = 3
mj["identifier2"] = 4
dream["identifier2"] = 5
shaq["identifier2"] = 6
iverson["identifier2"] = 7
tim["identifier2"] = 8
dirk["identifier2"] = 9
kobe["identifier2"] = 10
lebron["identifier2"] = 11
dwade["identifier2"] = 12
kd["identifier2"] = 13
steph["identifier2"] = 14

In [98]:
iverson.to_csv("iverson.csv")
dirk.to_csv("dirk.csv")
dwade.to_csv("dwade.csv")
steph.to_csv("steph.csv")
magic.to_csv("magic.csv")
bird.to_csv("bird.csv")
isiah.to_csv("isiah.csv")
mj.to_csv("mj.csv")
dream.to_csv("dream.csv")
shaq.to_csv("shaq.csv")
tim.to_csv("tim.csv")
kobe.to_csv("kobe.csv")
lebron.to_csv("lebron.csv")
kd.to_csv("kd.csv")