### 1. Get NBA Draft Data

In [4]:
import pandas as pd
from pathlib import Path

current_dir = Path.cwd()
draft_dir = current_dir.parent /"data"/ "drafts"

files = list(draft_dir.glob("draft_20*.csv"))

draft_df = pd.DataFrame()
for file in files:
    curr_df = pd.read_csv(file)
    draft_df = pd.concat([draft_df, curr_df])

draft_df = draft_df.sort_values(by=["SEASON", "OVERALL_PICK"]) 
print(draft_df.shape)
draft_df.head()

(1542, 14)


Unnamed: 0,PERSON_ID,PLAYER_NAME,SEASON,ROUND_NUMBER,ROUND_PICK,OVERALL_PICK,DRAFT_TYPE,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,ORGANIZATION,ORGANIZATION_TYPE,PLAYER_PROFILE_FLAG
0,2030,Kenyon Martin,2000,1,1,1,Draft,1610612751,New Jersey,Nets,NJN,Cincinnati,College/University,1
1,2031,Stromile Swift,2000,1,2,2,Draft,1610612763,Vancouver,Grizzlies,VAN,Louisiana State,College/University,1
2,2032,Darius Miles,2000,1,3,3,Draft,1610612746,Los Angeles,Clippers,LAC,East St. Louis,High School,1
3,2033,Marcus Fizer,2000,1,4,4,Draft,1610612741,Chicago,Bulls,CHI,Iowa State,College/University,1
4,2034,Mike Miller,2000,1,5,5,Draft,1610612753,Orlando,Magic,ORL,Florida,College/University,1


### 2. Get NBA Combine Data

In [5]:
import pandas as pd
from pathlib import Path

current_dir = Path.cwd()
draft_combine_dir = current_dir.parent /"data"/ "draft_combine"

files = list(draft_combine_dir.glob("draft_combine_20*.csv"))

draft_combine_df = pd.DataFrame()
for file in files:
    curr_df = pd.read_csv(file)
    draft_combine_df = pd.concat([draft_combine_df, curr_df])

draft_combine_df = draft_combine_df.sort_values(by=["SEASON"])
print(draft_combine_df.shape)
draft_combine_df.head()

(1795, 47)


Unnamed: 0,SEASON,PLAYER_ID,FIRST_NAME,LAST_NAME,PLAYER_NAME,POSITION,HEIGHT_WO_SHOES,HEIGHT_WO_SHOES_FT_IN,HEIGHT_W_SHOES,HEIGHT_W_SHOES_FT_IN,...,SPOT_NBA_BREAK_RIGHT,SPOT_NBA_CORNER_RIGHT,OFF_DRIB_FIFTEEN_BREAK_LEFT,OFF_DRIB_FIFTEEN_TOP_KEY,OFF_DRIB_FIFTEEN_BREAK_RIGHT,OFF_DRIB_COLLEGE_BREAK_LEFT,OFF_DRIB_COLLEGE_TOP_KEY,OFF_DRIB_COLLEGE_BREAK_RIGHT,ON_MOVE_FIFTEEN,ON_MOVE_COLLEGE
10,2000,2037,Jamal,Crawford,Jamal Crawford,PG-SG,76.5,6' 4.5'',,,...,,,,,,,,,,
37,2000,2058,Mark,Madsen,Mark Madsen,PF,80.0,6' 8'',,,...,,,,,,,,,,
36,2000,2090,Justin,Love,Justin Love,PG-SG,73.5,6' 1.5'',,,...,,,,,,,,,,
35,2000,2061,Dan,Langhi,Dan Langhi,SF,81.0,6' 9'',,,...,,,,,,,,,,
34,2000,12144,Brandon,Kurtz,Brandon Kurtz,PF-C,81.5,6' 9.5'',,,...,,,,,,,,,,


### 3. Get Undrafted Players Data

In [9]:
# Make sure column types match
draft_df["PERSON_ID"] = draft_df["PERSON_ID"].astype(str)
draft_combine_df["PLAYER_ID"] = draft_combine_df["PLAYER_ID"].astype(str)

# Get the sets
drafted_ids = set(draft_df["PERSON_ID"].unique())
combine_ids = set(draft_combine_df["PLAYER_ID"].unique())

# Players who attended combine but were *not* drafted
undrafted_combine_ids = combine_ids - drafted_ids

# Filter full player rows from combine dataframe
undrafted_combine_players = draft_combine_df[
    draft_combine_df["PLAYER_ID"].isin(undrafted_combine_ids)
].copy()

undrafted_combine_players.head()


Unnamed: 0,SEASON,PLAYER_ID,FIRST_NAME,LAST_NAME,PLAYER_NAME,POSITION,HEIGHT_WO_SHOES,HEIGHT_WO_SHOES_FT_IN,HEIGHT_W_SHOES,HEIGHT_W_SHOES_FT_IN,...,SPOT_NBA_BREAK_RIGHT,SPOT_NBA_CORNER_RIGHT,OFF_DRIB_FIFTEEN_BREAK_LEFT,OFF_DRIB_FIFTEEN_TOP_KEY,OFF_DRIB_FIFTEEN_BREAK_RIGHT,OFF_DRIB_COLLEGE_BREAK_LEFT,OFF_DRIB_COLLEGE_TOP_KEY,OFF_DRIB_COLLEGE_BREAK_RIGHT,ON_MOVE_FIFTEEN,ON_MOVE_COLLEGE
36,2000,2090,Justin,Love,Justin Love,PG-SG,73.5,6' 1.5'',,,...,,,,,,,,,,
34,2000,12144,Brandon,Kurtz,Brandon Kurtz,PF-C,81.5,6' 9.5'',,,...,,,,,,,,,,
32,2000,12143,Kenyon,Jones,Kenyon Jones,PF-C,81.0,6' 9'',,,...,,,,,,,,,,
31,2000,12142,Nate,Johnson,Nate Johnson,SF,78.5,6' 6.5'',,,...,,,,,,,,,,
29,2000,12141,Jacob,Jaacks,Jacob Jaacks,PF,81.25,6' 9.25'',,,...,,,,,,,,,,
