# Importing EDA Packages and Data

In [1]:
import pandas as pd
import numpy as np

In [7]:
df = pd.read_csv('data/regseason/2012_stats.csv')
df2 = pd.read_csv('data/playoffs/2012_stats.csv')

In [8]:
print(df.shape, df2.shape)

(478, 31) (204, 31)


Some basic data cleaning done to our initial frames:
- Some players were traded during the middle of a season, and as such will have 3+ rows. The first will be a player's total stats for the season (df.Tm == TOT), and the following will be stats for each of the teams the player recorded stats for, and subsequently drop them from the dataframe. 
- Dropping leftover rows that served as headers on the original document.
- Removing the '\*' denoting any player that has made the Hall of Fame (sorry y\'all).
- Removing any unnecessary instances of 'Unnamed: 0' columns.

In [None]:
# from soup_bballref import get_player_stats

# for i in range(1989, 2020):
#     get_player_stats(i)

In [34]:
# from cleaning_script import clean_season

# for folder in ['regseason', 'reg-advanced', 'reg-36', 'playoffs', 'play-advanced', 'play-36']:
#     for year in range(1989, 2020):
#         clean_season(folder=folder, year=year)

In [5]:
print(df.shape, df2.shape)

(478, 30) (204, 31)


In [90]:
df.head()

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,current_year
0,Jeff Adrien,PF,25.0,HOU,8.0,0.0,7.9,0.9,2.0,0.438,...,0.6,2.1,2.8,0.1,0.0,0.3,0.3,1.6,2.6,2012
1,Arron Afflalo,SG,26.0,DEN,62.0,62.0,33.6,5.3,11.3,0.471,...,0.6,2.5,3.2,2.4,0.6,0.2,1.4,2.2,15.2,2012
2,Blake Ahearn,PG,27.0,UTA,4.0,0.0,7.5,1.0,3.5,0.286,...,0.0,0.5,0.5,0.3,0.0,0.0,1.3,1.0,2.5,2012
3,Solomon Alabi,C,23.0,TOR,14.0,0.0,8.7,0.9,2.6,0.361,...,1.1,2.3,3.4,0.2,0.1,0.6,0.4,0.8,2.4,2012
4,Cole Aldrich,C,23.0,OKC,26.0,0.0,6.7,0.8,1.6,0.524,...,0.5,1.3,1.8,0.1,0.3,0.6,0.3,0.8,2.2,2012


# Draft Year & Height-Weight Information

I've attained a dataset that contains the each player's heigh, weight, and draft year from the 1996-97 season through the 2019-20 season, and will be merging those relevant features onto our base dataframe.

src: https://www.kaggle.com/justinas/nba-height-and-weight-analysis/notebook?select=all_seasons.csv

In [172]:
draft_df = pd.read_csv('data/raw/draft/all_seasons.csv')
draft_df = draft_df[['player_name', 'age', 'player_height', 'player_weight', 'draft_year',
'country', 'season']]

In [173]:
draft_df.loc[draft_df.player_name == 'Kobe Bryant'].head(2)

Unnamed: 0,player_name,age,player_height,player_weight,draft_year,country,season
342,Kobe Bryant,18.0,200.66,90.7184,1996,USA,1996-97
536,Kobe Bryant,19.0,200.66,95.25432,1996,USA,1997-98


In [174]:
draft_df.describe()

Unnamed: 0,age,player_height,player_weight
count,11145.0,11145.0,11145.0
mean,27.168686,200.812818,100.637868
std,4.344164,9.190973,12.576295
min,18.0,160.02,60.327736
25%,24.0,195.58,90.7184
50%,27.0,200.66,99.79024
75%,30.0,208.28,109.315672
max,44.0,231.14,163.29312


It's a bit unfortunate that this dataframe only contains player information for seasons after the 1996-97 season, but until I get my head around how to scrape that information from basketball-reference it will have to do.

# Preparing Draft Year & Height/Weight for Merge
Going to create a features to determine whether or not a player was drafted, convert the 'season' variable to an operable integer, and converting height and weight away from the metric system.

In [175]:
# Delete this line if running for the first time.
raise Exception('This cell has been ran, and the data has already been saved.')

# Creating a drafted column to identify whether or not players were drafted
draft_df['drafted'] = np.where(draft_df['draft_year'] != 'Undrafted', 1, 0)

# Replace Undrafted with NaN
draft_df['draft_year'] = draft_df['draft_year'].replace(r'Undrafted', np.nan, regex=True)

# Truncating the season column to be the latter year, eg '1996-97' into 1997. This will allow
# me to create a "season played" column based off of draft year and the season.
# This will create an issue with the 1999-00 season that will be addressed.
for i in range(0, len(draft_df)):
    draft_df.iat[i, 6] = draft_df.season[i][:2] + draft_df.season[i][-2:]
    
# Converting draft_year and season to operable integers 
draft_df['draft_year'] = pd.to_numeric(draft_df['draft_year'])
draft_df['season'] = pd.to_numeric(draft_df['season'])
    
# Adjusting the 1999-00 season to correctly show the year 2000, rather than 1900.
for i in draft_df.loc[draft_df['season'] == 1900].index:
    draft_df.iat[i, 6] = 2000
    
# Converting player_weight from kg to pounds
draft_df.player_weight = draft_df.player_weight.apply(lambda x: x * 2.205)

# Converting player_height from cm to inches
draft_df.player_height = draft_df.player_height.apply(lambda x: x / 2.54)

draft_df.to_csv('data/clean/draft/all_seasons.csv')

In [176]:
draft_df.loc[draft_df.player_name == 'Kobe Bryant'].head(2)

Unnamed: 0,player_name,age,player_height,player_weight,draft_year,country,season,drafted
342,Kobe Bryant,18.0,79.0,200.034072,1996.0,USA,1997,1
536,Kobe Bryant,19.0,79.0,210.035776,1996.0,USA,1998,1


In [177]:
# Changing column names to align with those in the frame we'll be merging onto.
draft_df['current_year'] = draft_df['season']
draft_df['Player'] = draft_df['player_name']

# Pairing the columns down to the ones of interest.
draft_df = draft_df[['Player', 'age', 'player_height', 'player_weight', 'draft_year', 'current_year', 'drafted']]
draft_df.drafted.value_counts()

1    9203
0    1942
Name: drafted, dtype: int64

In [178]:
draft_df.head(2)

Unnamed: 0,Player,age,player_height,player_weight,draft_year,current_year,drafted
0,Dennis Rodman,36.0,78.0,220.037479,1986.0,1997,1
1,Dwayne Schintzius,28.0,85.0,260.044294,1990.0,1997,1


In [168]:
# Creating a list of undrafted players to look into retrieving the year they entered the NBA.
# 648 Unique undrafted players.
undrafted = set(draft_df.loc[draft_df.drafted == 0].player_name)

# Souping Undrafted Players' First Year

This did not work, as the wiki only contained which draft year players missed, and not the year they played their first game (the year their careers began).

In [46]:
# from bs4 import BeautifulSoup
# import pandas as pd
# import requests


# wiki_url = "https://en.wikipedia.org/wiki/List_of_National_Basketball_Association_undrafted_players"
# table_class = "wikitable"

# # this is the HTML from the given URL
# response = requests.get(wiki_url)

# # instantiating the Soup
# soup = BeautifulSoup(response.text, 'html.parser')


# # Putting the table we want into an object
# undrafted_table = soup.find('table', attrs={'class':table_class})

# undrafted_df = pd.read_html(str(undrafted_table))
# undrafted_df = undrafted_df[0]

In [22]:
# undrafted_df.head()

In [23]:
# undrafted_df.info()

In [24]:
# undrafted_df.isna().sum()

In [25]:
# # Dropping a row of NaN values, likely a header row.
# undrafted_df = undrafted_df.drop(undrafted_df.loc[undrafted_df.Player.isna()].index)

# undrafted_df.info()

In [57]:
#undrafted_df.to_csv('data/undrafted_first_year.csv')

In [26]:
df['current_year'] = 2012

In [27]:
df.head()

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,current_year
0,Jeff Adrien,PF,25.0,HOU,8.0,0.0,7.9,0.9,2.0,0.438,...,0.6,2.1,2.8,0.1,0.0,0.3,0.3,1.6,2.6,2012
1,Arron Afflalo,SG,26.0,DEN,62.0,62.0,33.6,5.3,11.3,0.471,...,0.6,2.5,3.2,2.4,0.6,0.2,1.4,2.2,15.2,2012
2,Blake Ahearn,PG,27.0,UTA,4.0,0.0,7.5,1.0,3.5,0.286,...,0.0,0.5,0.5,0.3,0.0,0.0,1.3,1.0,2.5,2012
3,Solomon Alabi,C,23.0,TOR,14.0,0.0,8.7,0.9,2.6,0.361,...,1.1,2.3,3.4,0.2,0.1,0.6,0.4,0.8,2.4,2012
4,Cole Aldrich,C,23.0,OKC,26.0,0.0,6.7,0.8,1.6,0.524,...,0.5,1.3,1.8,0.1,0.3,0.6,0.3,0.8,2.2,2012


In [28]:
draft_df.head(2)

Unnamed: 0,player_name,age,player_height,player_weight,draft_year,country,season,drafted
0,Dennis Rodman,36.0,78.0,220.037479,1986.0,USA,1997,1
1,Dwayne Schintzius,28.0,85.0,260.044294,1990.0,USA,1997,1


# Merging Reg and Playoff Stats Function
Creating a function to merge the regular season and playoff files for each year into a new combined .csv files, which will then be concatenated into one final data frame.
## DONE

In [87]:
def merge_reg_playoffs(year):
    """ A function to merge a year's regular season and playoff stats into one .csv file.
    
    Params:
    year- The year of stats to merge.
    """
    import pandas as pd
    from functools import reduce
    
    # Setting a list of folder names to iterate through.
    folders = ['regseason', 'reg-36', 'reg-advanced', 'playoffs', 'play-36', 'play-advanced']
    
    # Loading in the dataframes to merge and setting them to a list.
    reg = pd.read_csv(f'data/clean/{folders[0]}/{year}_stats.csv')
    reg36 = pd.read_csv(f'data/clean/{folders[1]}/{year}_stats.csv')
    reg_adv = pd.read_csv(f'data/clean/{folders[2]}/{year}_stats.csv')
    play = pd.read_csv(f'data/clean/{folders[3]}/{year}_stats.csv')
    play36 = pd.read_csv(f'data/clean/{folders[4]}/{year}_stats.csv')
    play_adv = pd.read_csv(f'data/clean/{folders[5]}/{year}_stats.csv')
    
    data_frames = [reg, reg_adv, reg36, play, play_adv, play36]
    
    # Naming each dataframe to allow proper suffixing where needed upon merger.
    for i in range(0, len(folders)):
        data_frames[i].name = folders[i]
    
    # Merging Regular Season and Playoff frames.
    df_merged = reduce(lambda left,right: pd.merge(left,right,on=['Player'],
                                            how='left',
                                            suffixes=(None, f'_'+right.name)),
                                            data_frames).fillna(0)

    # Saving the merged frame as a .csv file into the 'combined' folder.    
    pd.DataFrame.to_csv(df_merged, f'data/combined/{year}_stats.csv')

    
    return


In [90]:
#Delete this line if running for the first time.
raise Exception('This cell has been ran, and the data has already been saved.')

for i in range(1989, 2020):
    merge_reg_playoffs(i)

# Concat Merged Frames
Finally, merging the combined .csv files for each season into one .csv file of regular season and playoff stats for all of the seasons.

I will also be dropping a ton of unnecessary and/or redundant columns, and creating a feature that designates whether or not a player made the playoffs for any given season.
### DONE

In [97]:
#Delete this line if running for the first time.
raise Exception('This cell has been ran, and the data has already been saved.')

import glob
import os

# File path of the folder containing .csv files of interest
path = r'/Users/npardue/Desktop/Capstone/data/combined'   

# Joining the file paths and names into a list of files
all_files = glob.glob(os.path.join(path, "*.csv"))     

# Creating a dataframe for each file in all_files
df_from_each_file = (pd.read_csv(f) for f in all_files)

# Concatenating the dataframes found in df_from_each_file
concatenated_df   = pd.concat(df_from_each_file, ignore_index=True)

In [103]:
concatenated_df.shape

(13760, 178)

In [107]:
# Columns to drop from final dataframe
columns = 'Unnamed: 0|Unnamed: 0_reg-36|Pos_reg-36|Age_reg-36|Tm_reg-36|G_reg-36|current_year_reg-36|\
Unnamed: 0_reg-advanced|Pos_reg-advanced|Age_reg-advanced|Tm_reg-advanced|G_reg-advanced|current_year_reg-advanced|\
GS_reg-advanced|MP_reg-advanced|\
Unnamed: 0_playoffs|Pos_playoffs|Age_playoffs|Tm_playoffs|\
Unnamed: 0_play-36|Pos_play-36|Age_play-36|Tm_play-36|G_play-36|current_year_play-36|\
G_play-36|current_year_play-36|\
Unnamed: 0_play-advanced|Pos_play-advanced|Age_play-advanced|Tm_play-advanced|G_play-advanced|current_year_play-advanced|\
GS_play-advanced|MP_play-advanced|current_year_play-advanced'
            
columns = columns.split('|')

concatenated_df = concatenated_df.drop(columns=columns)

concatenated_df['made_playoffs'] = np.where(concatenated_df.current_year_playoffs != 0, 1, 0)
# Sorting the frame by year.
concatenated_df = concatenated_df.sort_values(by='current_year', ascending=True)

In [159]:
concatenated_df.shape

(13760, 147)

In [161]:
# Delete this line if running for the first time.
raise Exception('This cell has been ran, and the data has already been saved.')
concatenated_df.to_csv('data/final/final_df.csv')

# Merging Height & Weight, Draft Year, Drafted 
Now that we have a dataframe containing both regular season and playoff features, we'll go ahead and merge in the player characteristic features.

In [179]:
draft_df.head()

Unnamed: 0,Player,age,player_height,player_weight,draft_year,current_year,drafted
0,Dennis Rodman,36.0,78.0,220.037479,1986.0,1997,1
1,Dwayne Schintzius,28.0,85.0,260.044294,1990.0,1997,1
2,Earl Cureton,39.0,81.0,210.035776,1979.0,1997,1
3,Ed O'Bannon,24.0,80.0,222.03782,1995.0,1997,1
4,Ed Pinckney,34.0,81.0,240.040886,1985.0,1997,1


In [180]:
print(concatenated_df.shape, draft_df.shape)

(13760, 147) (11145, 7)


In [181]:
# Creating a column of unique values to serve as an index, as the indexes for each dataframe are
# different. It will be in the form "PlayerNameCurrentYear" 

draft_df.current_year = draft_df.current_year.astype(str)
draft_df['merge_here'] = draft_df.Player + draft_df.current_year
draft_df = draft_df.drop(columns = ['current_year', 'Player', 'age'])


concatenated_df.current_year = concatenated_df.current_year.astype(str)
concatenated_df['merge_here'] = concatenated_df.Player + concatenated_df.current_year
concatenated_df.current_year = concatenated_df.current_year.astype(int)


In [182]:
final_df = concatenated_df.merge(draft_df, on='merge_here', validate='1:m')
final_df.loc[final_df.Player == 'Kobe Bryant']

Unnamed: 0,Unnamed: 0.1,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,BLK_play-advanced,TOV_play-advanced,PF_play-advanced,PTS_play-advanced,made_playoffs,merge_here,player_height,player_weight,draft_year,drafted
86,63,Kobe Bryant,SF,18.0,LAL,71.0,6.0,15.5,2.5,5.9,...,0.5,3.8,6.2,20.0,1,Kobe Bryant1997,79.0,200.034072,1996.0,1
525,67,Kobe Bryant,SF,19.0,LAL,79.0,1.0,26.0,4.9,11.6,...,1.3,1.8,4.6,15.7,1,Kobe Bryant1998,79.0,210.035776,1996.0,1
956,61,Kobe Bryant,SG,20.0,LAL,50.0,50.0,37.9,7.2,15.6,...,1.1,3.5,2.7,18.1,1,Kobe Bryant1999,79.0,215.036627,1996.0,1
1636,69,Kobe Bryant,SG,21.0,LAL,66.0,62.0,38.2,8.4,17.9,...,1.3,2.3,3.7,19.5,1,Kobe Bryant2000,79.0,210.035776,1996.0,1
2044,63,Kobe Bryant,SG,22.0,LAL,68.0,68.0,40.9,10.3,22.2,...,0.6,2.6,2.7,24.4,1,Kobe Bryant2001,79.0,210.035776,1996.0,1
2490,76,Kobe Bryant,SG,23.0,LAL,80.0,80.0,38.3,9.4,20.0,...,0.7,2.3,2.8,21.9,1,Kobe Bryant2002,79.0,210.035776,1996.0,1
2597,66,Kobe Bryant,SG,24.0,LAL,82.0,82.0,41.5,10.6,23.5,...,0.1,2.8,2.4,26.1,1,Kobe Bryant2003,79.0,215.036627,1996.0,1
2996,89,Kobe Bryant,SG,25.0,LAL,65.0,64.0,37.6,7.9,18.1,...,0.3,2.3,2.2,19.9,1,Kobe Bryant2004,78.0,220.037479,1996.0,1
3689,86,Kobe Bryant,SG,26.0,LAL,66.0,66.0,40.7,8.7,20.1,...,0.0,0.0,0.0,0.0,0,Kobe Bryant2005,78.0,220.037479,1996.0,1
3973,82,Kobe Bryant,SG,27.0,LAL,80.0,80.0,41.0,12.2,27.2,...,0.3,3.8,2.9,22.4,1,Kobe Bryant2006,78.0,220.037479,1996.0,1


In [183]:
#Delete this line if running for the first time.
#raise Exception('This cell has been ran, and the data has already been saved.')
final_df = final_df.drop(columns= ['merge_here', 'Unnamed: 0.1'])

final_df.to_csv('data/final/final_df.csv')

In [184]:
# Still have 1700 Undrafted entries. Will address these once I figure out the scraping tool
# selenium.
final_df.loc[(final_df.drafted == 0)]

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,STL_play-advanced,BLK_play-advanced,TOV_play-advanced,PF_play-advanced,PTS_play-advanced,made_playoffs,player_height,player_weight,draft_year,drafted
0,Ruben Nembhard,SG-PG,24.0,TOT,10.0,0.0,11.3,1.6,3.7,0.432,...,0.0,0.0,0.0,0.0,0.0,0,75.0,208.035435,,0
16,Tracy Moore,SG,31.0,HOU,27.0,1.0,8.8,1.2,3.1,0.388,...,0.0,0.0,0.0,0.0,0.0,0,76.0,200.034072,,0
18,Nate Driggers,SG,23.0,BOS,15.0,0.0,8.8,0.9,2.9,0.302,...,0.0,0.0,0.0,0.0,0.0,0,77.0,215.036627,,0
50,Adrian Caldwell,PF,30.0,TOT,45.0,0.0,12.6,0.9,2.0,0.435,...,0.0,0.0,0.0,0.0,0.0,0,80.0,265.045145,,0
58,Scott Brooks,PG,31.0,NYK,38.0,0.0,6.6,0.5,1.0,0.487,...,0.0,0.0,0.0,0.0,15.4,1,71.0,165.028109,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9864,Ryan Arcidiacono,PG,24.0,CHI,81.0,32.0,24.2,2.3,5.2,0.447,...,0.0,0.0,0.0,0.0,0.0,0,75.0,200.034072,,0
9877,Rawle Alkins,SG,21.0,CHI,10.0,1.0,12.0,1.3,3.9,0.333,...,0.0,0.0,0.0,0.0,0.0,0,77.0,225.038331,,0
9879,DeVaughn Akoon-Purcell,SG,25.0,DEN,7.0,0.0,3.1,0.4,1.4,0.300,...,0.0,0.0,0.0,0.0,0.0,0,78.0,200.034072,,0
9880,Deng Adel,SF,21.0,CLE,19.0,3.0,10.2,0.6,1.9,0.306,...,0.0,0.0,0.0,0.0,0.0,0,79.0,200.034072,,0


In [185]:
final_df[~final_df.Player.isin(list(final_df.loc[final_df.current_year == 2019].Player))]

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,STL_play-advanced,BLK_play-advanced,TOV_play-advanced,PF_play-advanced,PTS_play-advanced,made_playoffs,player_height,player_weight,draft_year,drafted
0,Ruben Nembhard,SG-PG,24.0,TOT,10.0,0.0,11.3,1.6,3.7,0.432,...,0.0,0.0,0.0,0.0,0.0,0,75.0,208.035435,,0
1,Tracy Murray,SF,25.0,WSB,82.0,1.0,22.1,3.5,8.3,0.425,...,1.7,0.8,0.0,4.1,22.8,1,79.0,228.038842,1992.0,1
2,Steve Nash,PG,22.0,PHO,65.0,2.0,10.5,1.1,2.7,0.423,...,2.4,2.4,4.8,12.0,12.0,1,75.0,195.033220,1996.0,1
3,Ivano Newbill,PF,26.0,ATL,72.0,2.0,11.8,0.6,1.3,0.440,...,0.0,0.0,7.2,14.4,0.0,1,82.0,245.041738,1994.0,1
4,Elden Campbell,PF,28.0,LAL,77.0,77.0,32.7,5.7,12.2,0.469,...,0.9,1.7,1.8,3.9,13.7,1,84.0,250.042590,1990.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9394,Alec Peters,PF,22.0,PHO,20.0,0.0,11.3,1.4,3.7,0.378,...,0.0,0.0,0.0,0.0,0.0,0,81.0,235.040035,2017.0,1
9395,Kendrick Perkins,C,33.0,CLE,1.0,0.0,15.0,1.0,2.0,0.500,...,0.0,0.0,0.0,0.0,0.0,0,82.0,270.045997,2003.0,1
9398,London Perrantes,PG,23.0,CLE,14.0,0.0,4.7,0.1,0.9,0.154,...,0.0,0.0,0.0,0.0,0.0,0,74.0,190.032368,,0
9400,Adreian Payne,PF,26.0,ORL,5.0,0.0,8.6,1.4,2.0,0.700,...,0.0,0.0,0.0,0.0,0.0,0,82.0,237.040375,2014.0,1


In [186]:
final_df.shape

(9887, 149)

In [187]:
final_df.drop(final_df.loc[final_df.current_year == 2019].Player.index)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,STL_play-advanced,BLK_play-advanced,TOV_play-advanced,PF_play-advanced,PTS_play-advanced,made_playoffs,player_height,player_weight,draft_year,drafted
0,Ruben Nembhard,SG-PG,24.0,TOT,10.0,0.0,11.3,1.6,3.7,0.432,...,0.0,0.0,0.0,0.0,0.0,0,75.0,208.035435,,0
1,Tracy Murray,SF,25.0,WSB,82.0,1.0,22.1,3.5,8.3,0.425,...,1.7,0.8,0.0,4.1,22.8,1,79.0,228.038842,1992.0,1
2,Steve Nash,PG,22.0,PHO,65.0,2.0,10.5,1.1,2.7,0.423,...,2.4,2.4,4.8,12.0,12.0,1,75.0,195.033220,1996.0,1
3,Ivano Newbill,PF,26.0,ATL,72.0,2.0,11.8,0.6,1.3,0.440,...,0.0,0.0,7.2,14.4,0.0,1,82.0,245.041738,1994.0,1
4,Elden Campbell,PF,28.0,LAL,77.0,77.0,32.7,5.7,12.2,0.469,...,0.9,1.7,1.8,3.9,13.7,1,84.0,250.042590,1990.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9403,Chandler Parsons,SF,29.0,MEM,36.0,8.0,19.2,3.0,6.5,0.462,...,0.0,0.0,0.0,0.0,0.0,0,82.0,230.039183,2011.0,1
9404,Tony Parker,PG,35.0,SAS,55.0,21.0,19.5,3.2,7.1,0.459,...,1.1,0.0,2.1,1.1,17.7,1,74.0,185.031517,2001.0,1
9405,Jabari Parker,PF,22.0,MIL,31.0,3.0,24.0,5.2,10.7,0.482,...,1.5,0.9,1.3,3.2,15.1,1,80.0,250.042590,2014.0,1
9406,Mason Plumlee,C,27.0,DEN,74.0,26.0,19.4,3.0,5.0,0.601,...,0.0,0.0,0.0,0.0,0.0,0,83.0,255.043442,2013.0,1


In [365]:
final_df.shape

(9887, 64)

In [396]:
final_df.drafted.value_counts()

1    8187
0    1700
Name: drafted, dtype: int64