# NBA Playstyle Profiler

In [1]:
import pandas as pd
from scipy.stats import percentileofscore
from scipy.spatial import distance

In [2]:
PATH_TO_DATASET = "../../data/processed/final_dataset.csv"
df = pd.read_csv(PATH_TO_DATASET, index_col=0)
df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,OREB,DREB,...,POST_TOUCH_FGA,POST_TOUCH_FG_PCT,PASSES_MADE,AST_POINTS_CREATED,AST_TO_PASS_PCT,TIME_OF_POSS,MID_RANGE_FGM,MID_RANGE_FGA,MID_RANGE_FG_PCT,CLUTCH_PTS
0,1631260,AJ Green,6.5,15.4,0.423,5.4,13.3,0.408,0.7,4.3,...,0.0,0.000,8.2,1.3,0.065,0.4,3.2,6.4,0.500,0.0
1,203932,Aaron Gordon,8.5,15.2,0.556,0.9,2.9,0.290,3.7,6.3,...,0.7,0.542,28.3,9.2,0.125,2.2,1.2,4.2,0.297,17.9
2,1628988,Aaron Holiday,6.9,15.4,0.446,3.1,8.0,0.387,0.8,3.7,...,0.0,0.000,18.9,4.8,0.095,1.8,5.4,10.5,0.511,5.0
3,1630174,Aaron Nesmith,7.4,14.9,0.496,3.3,7.9,0.419,1.5,5.0,...,0.0,0.000,28.0,3.7,0.054,1.1,2.4,4.5,0.533,12.5
4,1630598,Aaron Wiggins,8.3,14.7,0.562,2.4,4.9,0.492,2.3,4.9,...,0.0,1.000,13.4,2.8,0.082,0.7,0.8,4.0,0.188,7.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
346,1630214,Xavier Tillman,6.0,13.8,0.434,1.0,4.0,0.247,3.5,7.0,...,0.2,0.455,21.0,3.8,0.068,0.8,0.8,2.9,0.286,10.0
347,1628380,Zach Collins,9.2,19.0,0.484,1.8,5.6,0.320,3.6,7.8,...,0.9,0.525,27.2,7.3,0.105,1.2,3.3,9.1,0.365,23.3
348,203897,Zach LaVine,9.6,21.1,0.452,3.3,9.5,0.349,0.4,6.8,...,0.0,0.000,42.5,10.0,0.092,4.1,8.1,17.0,0.476,32.4
349,1630533,Ziaire Williams,7.0,17.5,0.397,2.7,8.8,0.307,1.6,6.7,...,0.0,0.000,17.5,3.9,0.084,1.3,5.2,15.2,0.341,0.0


In [3]:
print(list(df.columns))

['PLAYER_ID', 'PLAYER_NAME', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'E_OFF_RATING', 'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING', 'NET_RATING', 'AST_PCT', 'AST_TO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'E_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'USG_PCT', 'PACE', 'PIE', 'PTS_2ND_CHANCE', 'PTS_FB', 'PTS_PAINT', 'PTS_OFF_TOV', 'PCT_FGA_2PT', 'PCT_FGA_3PT', 'PCT_PTS_2PT', 'PCT_PTS_3PT', 'PCT_PTS_FB', 'PCT_PTS_PAINT', 'PCT_FGM', 'PCT_FGA', 'PCT_FG3M', 'PCT_FG3A', 'OPP_PTS_PAINT', 'PCT_STL', 'PCT_BLK', 'CATCH_SHOOT_FGM', 'CATCH_SHOOT_FGA', 'CATCH_SHOOT_FG_PCT', 'CATCH_SHOOT_PTS', 'CATCH_SHOOT_FG3M', 'CATCH_SHOOT_FG3A', 'CATCH_SHOOT_FG3_PCT', 'CATCH_SHOOT_EFG_PCT', 'DRIVE_FGM', 'DRIVE_FGA', 'DRIVE_FG_PCT', 'DRIVE_PTS', 'PULL_UP_FGM', 'PULL_UP_FGA', 'PULL_UP_FG_PCT', 'PULL_UP_PTS', 'PULL_UP_FG3M', 'PULL_UP_FG3A', 'PULL_UP_FG3_PCT', 'PULL_UP_EFG_PCT', 'POST_TOUCH_FGM', 'POST_TOUCH_

## Data Cleaning and Preprocessing

In [4]:
# Check how many NaN values there are in the dataframe and which columns they are in
nan_count = df.isna().sum()
print("Number of NaN values:", list(nan_count))

nan_columns = nan_count[nan_count > 0].index.tolist()

print("Column names with NaN values:")
for column_name in nan_columns:
    print(column_name)

Number of NaN values: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 8, 8, 11, 3, 0, 0, 0, 0, 0, 0, 4, 0, 8, 8, 25, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Column names with NaN values:
CATCH_SHOOT_FG_PCT
CATCH_SHOOT_FG3M
CATCH_SHOOT_FG3A
CATCH_SHOOT_FG3_PCT
CATCH_SHOOT_EFG_PCT
PULL_UP_FG_PCT
PULL_UP_FG3M
PULL_UP_FG3A
PULL_UP_FG3_PCT
PULL_UP_EFG_PCT


In [5]:
# Replace all the NaN values with 0
df = df.fillna(0)

In [6]:
# We need to reverse the turnovers columns
# We want to prevent turnovers as much as possible in basketball so a high turnover average is bad

df['TOV']

0      0.9
1      2.2
2      2.0
3      1.5
4      2.2
      ... 
346    1.7
347    4.1
348    2.9
349    3.1
350    4.1
Name: TOV, Length: 351, dtype: float64

In [7]:
# Modify TOV inplace
max_TOV = df['TOV'].max()
df['TOV'] = (max_TOV + 1) - df['TOV']
df['TOV']

0      5.9
1      4.6
2      4.8
3      5.3
4      4.6
      ... 
346    5.1
347    2.7
348    3.9
349    3.7
350    2.7
Name: TOV, Length: 351, dtype: float64

In [8]:
# Modify TOV_PCT inplace
max_E_TOV_PCT = df['E_TOV_PCT'].max()
df['E_TOV_PCT'] = (max_E_TOV_PCT + 1) - df['E_TOV_PCT']
df['E_TOV_PCT']

0      12.9
1       9.0
2       9.5
3      10.4
4       7.6
       ... 
346     9.5
347     4.8
348     8.7
349     5.8
350     7.4
Name: E_TOV_PCT, Length: 351, dtype: float64

In [9]:
# Function to replace df values with their percentiles of respective columns
def replace_with_percentiles(df):
    
    columns_to_skip = ['PLAYER_ID', 'PLAYER_NAME']
    
    for col in df.columns:
        if col in columns_to_skip: # we do not want to compute percentile for these columns
            continue
        # Calculate percentiles for each column
        percentiles = df[col].apply(lambda x: percentileofscore(df[col], x))
        # Replace values with percentiles
        df[col] = percentiles
    return df

In [10]:
scaled_df = replace_with_percentiles(df)
scaled_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,OREB,DREB,...,POST_TOUCH_FGA,POST_TOUCH_FG_PCT,PASSES_MADE,AST_POINTS_CREATED,AST_TO_PASS_PCT,TIME_OF_POSS,MID_RANGE_FGM,MID_RANGE_FGA,MID_RANGE_FG_PCT,CLUTCH_PTS
0,1631260,AJ Green,29.202279,44.444444,16.096866,98.433048,98.290598,86.609687,11.965812,19.230769,...,28.205128,24.786325,1.424501,1.851852,23.361823,2.991453,52.421652,44.871795,86.894587,16.524217
1,203932,Aaron Gordon,61.823362,42.307692,87.321937,15.954416,15.527066,14.245014,82.905983,57.834758,...,90.028490,81.196581,54.415954,72.507123,88.746439,63.105413,26.210826,31.196581,20.512821,65.099715
2,1628988,Aaron Holiday,37.464387,44.444444,37.464387,68.233618,61.823362,72.792023,17.378917,8.404558,...,28.205128,24.786325,25.498575,45.726496,61.680912,56.980057,67.236467,61.538462,90.028490,36.182336
3,1630174,Aaron Nesmith,46.866097,40.740741,72.364672,72.507123,60.683761,93.732194,45.584046,33.333333,...,28.205128,24.786325,52.849003,34.330484,10.541311,36.752137,45.584046,32.905983,92.735043,52.564103
4,1630598,Aaron Wiggins,60.541311,39.031339,88.603989,45.441595,27.777778,99.145299,66.381766,30.911681,...,28.205128,97.720798,10.968661,18.518519,46.153846,18.091168,17.663818,29.772080,9.544160,40.313390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
346,1630214,Xavier Tillman,21.082621,30.769231,24.643875,16.951567,20.655271,10.541311,80.769231,67.378917,...,73.076923,63.532764,33.048433,35.897436,26.210826,24.786325,17.663818,20.797721,18.660969,46.296296
347,1628380,Zach Collins,69.515670,68.803419,65.242165,31.623932,33.333333,24.216524,81.908832,73.931624,...,92.592593,77.777778,51.566952,63.105413,72.364672,40.598291,53.418803,56.837607,39.031339,73.931624
348,203897,Zach LaVine,75.783476,78.062678,42.592593,72.507123,78.062678,41.310541,1.424501,64.957265,...,28.205128,24.786325,84.045584,76.638177,58.262108,85.897436,83.903134,76.068376,79.344729,84.900285
349,1630533,Ziaire Williams,39.458689,59.971510,7.692308,53.988604,70.797721,19.800570,49.145299,63.247863,...,28.205128,24.786325,21.937322,37.464387,49.287749,44.586895,66.239316,71.652422,29.344729,16.524217


## Feature Engineering

In [11]:
engineered_ratings = ['SCORING', 'REBOUNDING', 'PLAYMAKING', 'BOARD_ACTIVITY', 'STEALS', 'BLOCKING', 'SHOOTING_EFFICIENCY', 
                     '3PT_SHOOTING_EFFICIENCY', 'CATCH_AND_SHOOT', 'BALL_HANDLING', 'PASSING', 'TURNOVERS', 
                     '3PT_FREQ', 'POST_UP', 'DRIVING', 'MID-RANGE', 'DEFENSE', 'CLUTCHNESS', 'FAST_BREAK', 'SECOND_CHANCE',
                     'TEAM_IMPACT']

In [12]:
df['SCORING'] = (0.8 * df['PTS']) + (0.1 * df['TS_PCT']) + (0.1 * df['EFG_PCT'])
df['REBOUNDING'] = (0.7 * df['DREB']) + (0.3 * df['OREB'])
df['PLAYMAKING'] = (0.5 * df['AST_POINTS_CREATED']) + (0.1 * df['AST_TO_PASS_PCT']) + (0.2 * df['AST']) + (0.1 * df['PASSES_MADE']) + (0.1 * df['AST_TO'])
df['BOARD_ACTIVITY'] = (0.7 * df['REB_PCT']) + (0.2 * df['DREB_PCT']) + (0.1 * df['OREB_PCT'])
df['STEALS'] = (0.7 * df['STL']) + (0.3 * df['PCT_STL'])
df['BLOCKING'] = (0.8 * df['BLK']) + (0.1 * df['BLKA']) + (0.1 * df['PCT_BLK'])
df['SHOOTING_EFFICIENCY'] = (0.6 * df['EFG_PCT']) + (0.2 * df['TS_PCT']) + (0.15 * df['FG_PCT']) + (0.05 * df['FG3_PCT'])
df['3PT_SHOOTING_EFFICIENCY'] = df['FG3_PCT']
df['CATCH_AND_SHOOT'] = (0.4 * df['CATCH_SHOOT_EFG_PCT']) + (0.3 * df['CATCH_SHOOT_PTS']) + (0.2 * df['CATCH_SHOOT_FG3A']) + (0.1 * df['CATCH_SHOOT_FGA'])
df['BALL_HANDLING'] = (0.5 * df['TIME_OF_POSS']) + (0.2 * df['USG_PCT']) + (0.15 * df['PACE']) + (0.15 * df['TOV'])
df['PASSING'] = (0.4 * df['PASSES_MADE']) + (0.4 * df['AST_TO']) + (0.2 * df['AST_POINTS_CREATED'])
df['TURNOVERS'] = (0.5 * df['E_TOV_PCT']) + (0.3 * df['TOV']) + (0.2 * df['AST_TO'])
df['3PT_FREQ'] = (0.6 * df['PCT_FGA_3PT']) + (0.2 * df['PCT_FG3M']) + (0.2 * df['PCT_PTS_3PT'])
df['POST_UP'] = (0.6 * df['POST_TOUCH_FGA']) + (0.2 * df['POST_TOUCH_FGM']) + (0.2 * df['POST_TOUCH_FG_PCT'])
df['DRIVING'] = (0.5 * df['DRIVE_PTS']) + (0.3 * df['DRIVE_FGA']) + (0.15 * df['DRIVE_FGM']) + (0.05 * df['DRIVE_FG_PCT'])
df['MID-RANGE'] = (0.4 * df['MID_RANGE_FGM']) + (0.3 * df['MID_RANGE_FG_PCT']) + (0.3 * df['MID_RANGE_FGA'])
df['DEFENSE'] = (0.5 * df['DEF_RATING']) + (0.1 * df['E_DEF_RATING']) + (0.2 * df['STL']) + (0.2 * df['BLK'])
df['CLUTCHNESS'] = df['CLUTCH_PTS']
df['FAST_BREAK'] = (0.8 * df['PTS_FB']) + (0.1 * df['PACE']) + (0.1 * df['PCT_PTS_FB'])
df['SECOND_CHANCE'] = (0.9 * df['PTS_2ND_CHANCE']) + (0.1 * df['OREB'])
df['TEAM_IMPACT'] = (0.95 * df['PIE']) + (0.05 * df['PLUS_MINUS'])

  df['DEFENSE'] = (0.5 * df['DEF_RATING']) + (0.1 * df['E_DEF_RATING']) + (0.2 * df['STL']) + (0.2 * df['BLK'])
  df['CLUTCHNESS'] = df['CLUTCH_PTS']
  df['FAST_BREAK'] = (0.8 * df['PTS_FB']) + (0.1 * df['PACE']) + (0.1 * df['PCT_PTS_FB'])
  df['SECOND_CHANCE'] = (0.9 * df['PTS_2ND_CHANCE']) + (0.1 * df['OREB'])
  df['TEAM_IMPACT'] = (0.95 * df['PIE']) + (0.05 * df['PLUS_MINUS'])


In [13]:
columns_to_keep = ['PLAYER_ID', 'PLAYER_NAME'] + engineered_ratings

df = df[columns_to_keep]
df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,SCORING,REBOUNDING,PLAYMAKING,BOARD_ACTIVITY,STEALS,BLOCKING,SHOOTING_EFFICIENCY,3PT_SHOOTING_EFFICIENCY,...,TURNOVERS,3PT_FREQ,POST_UP,DRIVING,MID-RANGE,DEFENSE,CLUTCHNESS,FAST_BREAK,SECOND_CHANCE,TEAM_IMPACT
0,1631260,AJ Green,53.760684,17.051282,13.361823,16.666667,2.236467,11.282051,74.779202,86.609687,...,93.746439,98.319088,28.347578,3.447293,60.498575,17.706553,16.524217,15.712251,25.170940,21.460114
1,203932,Aaron Gordon,60.455840,65.356125,70.854701,69.814815,30.113960,63.988604,76.032764,14.245014,...,61.552707,16.353276,88.518519,55.306268,25.997151,36.296296,65.099715,50.099715,82.393162,62.706553
2,1628988,Aaron Holiday,42.735043,11.096866,51.980057,7.450142,59.957265,8.076923,47.948718,72.792023,...,70.783476,72.136752,28.347578,51.339031,72.364672,17.321937,36.182336,33.034188,34.558405,31.837607
3,1630174,Aaron Nesmith,58.475783,37.008547,30.413105,38.148148,58.304843,71.652422,86.111111,93.732194,...,74.344729,73.019943,28.347578,66.609687,55.925926,66.396011,52.564103,72.549858,48.660969,24.138177
4,1630598,Aaron Wiggins,63.276353,41.552707,25.099715,44.002849,86.282051,44.586895,94.601140,99.145299,...,40.968661,37.863248,42.934473,42.428775,18.860399,36.680912,40.313390,70.156695,75.997151,54.686610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
346,1630214,Xavier Tillman,9.401709,71.396011,38.689459,72.264957,92.364672,87.521368,7.756410,10.541311,...,71.809117,22.905983,70.797721,20.619658,18.903134,44.985755,46.296296,10.712251,32.051282,30.113960
347,1628380,Zach Collins,61.623932,76.324786,60.384615,77.464387,22.649573,82.079772,41.082621,24.216524,...,11.538462,27.578348,89.772080,38.703704,50.128205,68.048433,73.931624,14.985755,74.601140,49.964387
348,203897,Zach LaVine,74.330484,45.897436,70.384615,42.336182,30.669516,23.789174,41.160969,41.310541,...,45.726496,58.860399,28.347578,83.148148,80.185185,52.094017,84.900285,83.561254,36.552707,67.293447
349,1630533,Ziaire Williams,36.566952,59.017094,34.601140,57.621083,64.715100,24.031339,6.958689,19.800570,...,16.054131,64.131054,28.347578,35.071225,56.794872,44.230769,16.524217,45.384615,69.401709,22.307692


## Survey

In [14]:
def get_user_input(question):
    while True:
        try:
            response = int(input(question))
            if 0 <= response <= 100:
                return response
            else:
                print("Please enter an number between 0 and 100.")
        except ValueError:
            print("Invalid input. Please enter a valid integer value.")

In [15]:
def print_instructions():
    print("Welcome to the NBA Basketball PlayStyle Profiler Survey!")
    print(80 * '-')
    print(" * We will prompt you with some questions about your basketball playstyle and overall skill.")
    print(80 * '-')
    print(" * We will ask you to rate yourself from 0 to 100 for each question.")
    print(" * Your rating will represent your opinion for the percentile for that skill.")
    print(" * For example, if you rated yourself an 80 for catch and shoot ability, ")
    print("you believe that you are better than or as good as 80% of players in catch/shoot scenarios.")
    print(80 * '-')
    print(" * Adjust your rating scale for the people you play with most frequently.")
    print(" * Please answer honestly for the best results.")
    print(80 * '-')
    print(" * Please rate the following aspects of your basketball play from 0 to 100:")

def basketball_survey():    
    scoring = get_user_input("Scoring ability: How skilled of a scorer are you?   ")
    rebounding = get_user_input("Rebounding skill: How skilled are you at getting the ball off missed shots?   ")
    playmaking = get_user_input("Playmaking skill: How good are you at setting up teammates for scoring opportunities?   ")
    board_activity = get_user_input("Activity on the boards: How actively do you pursue rebounds?   ")
    steals = get_user_input("Defensive hands: How skilled are you at stealing the ball from opponents?   ")
    blocks = get_user_input("Rim protection: How skilled are you at blocking opponents' shots?   ")
    shooting_efficiency = get_user_input("Shooting efficiency: How efficient of a shooter are you?   ")
    three_point_shooting_efficiency = get_user_input("3PT Shooting efficiency: How efficient of a 3PT shooter are you?   ")
    catch_shoot = get_user_input("Catch and shoot: How good is your catch and shoot game?   ")
    ball_handling = get_user_input("Ball handling: How well do you handle the ball?   ")
    passing_rating = get_user_input("Overall passing: How well/often do you pass the ball?   ")
    turnover_ratio = get_user_input("Turnovers: How well do you take care of the ball (prevent turnovers)?   ")
    three_point_freq = get_user_input("Three-point frequency: How often do you shoot 3-pointers?   ")
    post_up = get_user_input("Post-up frequency: How often do you post-up?   ")
    driving = get_user_input("Driving: How well do you score when driving into the paint?   ")
    mid_range_shooting = get_user_input("Mid-range shooting: How well/often do you shoot from mid-range?   ")
    defense_rating = get_user_input("Defensive effectiveness: How good of an overall defender are you?   ")
    clutch_performance = get_user_input("Clutch performance: How clutch are you? (How well do you score in clutch moments?)   ")
    fast_break = get_user_input("Fast break points: How often do you score on fast breaks?   ")
    second_chance = get_user_input("Second chance points: How often do you score on second chance opportunities?   ")
    team_impact = get_user_input("Team impact: How impactful are you to your team's overall performance?   ")
    
    # Return the collected data as a dictionary
    playstyle_dict = {
        "SCORING": [scoring],
        "REBOUNDING": [rebounding],
        "PLAYMAKING": [playmaking],
        "BOARD_ACTIVITY": [board_activity],
        "STEALS": [steals],
        "BLOCKING": [blocks],
        "SHOOTING_EFFICIENCY": [shooting_efficiency],
        "3PT_SHOOTING_EFFICIENCY": [three_point_shooting_efficiency],
        "CATCH_AND_SHOOT": [catch_shoot],
        "BALL_HANDLING": [ball_handling],
        "PASSING": [passing_rating],
        "TURNOVERS": [turnover_ratio],
        "3PT_FREQ": [three_point_freq],
        "POST_UP": [post_up],
        "DRIVING": [driving],
        "MID-RANGE": [mid_range_shooting],
        "DEFENSE": [defense_rating],
        "CLUTCHNESS": [clutch_performance],
        "FAST_BREAK": [fast_break],
        "SECOND_CHANCE": [second_chance],
        "TEAM_IMPACT": [team_impact]
    }
    
    index = [1]
    
    playstyle_df = pd.DataFrame(playstyle_dict, index=index)
    
    return playstyle_df

In [17]:
print_instructions()
user_profile_df = basketball_survey()

Welcome to the NBA Basketball PlayStyle Profiler Survey!
--------------------------------------------------------------------------------
 * We will prompt you with some questions about your basketball playstyle and overall skill.
--------------------------------------------------------------------------------
 * We will ask you to rate yourself from 0 to 100 for each question.
 * Your rating will represent your opinion for the percentile for that skill.
 * For example, if you rated yourself an 80 for catch and shoot ability, 
you believe that you are better than or as good as 80% of players in catch/shoot scenarios.
--------------------------------------------------------------------------------
 * Adjust your rating scale for the people you play with most frequently.
 * Please answer honestly for the best results.
--------------------------------------------------------------------------------
 * Please rate the following aspects of your basketball play from 0 to 100:
Scoring ability

In [18]:
user_profile_df

Unnamed: 0,SCORING,REBOUNDING,PLAYMAKING,BOARD_ACTIVITY,STEALS,BLOCKING,SHOOTING_EFFICIENCY,3PT_SHOOTING_EFFICIENCY,CATCH_AND_SHOOT,BALL_HANDLING,...,TURNOVERS,3PT_FREQ,POST_UP,DRIVING,MID-RANGE,DEFENSE,CLUTCHNESS,FAST_BREAK,SECOND_CHANCE,TEAM_IMPACT
1,70,35,80,40,70,20,60,60,70,95,...,80,60,0,70,65,75,70,80,10,75


## Calculate Euclidean Distances

In [19]:
ratings_df = df[engineered_ratings]
ratings_df

Unnamed: 0,SCORING,REBOUNDING,PLAYMAKING,BOARD_ACTIVITY,STEALS,BLOCKING,SHOOTING_EFFICIENCY,3PT_SHOOTING_EFFICIENCY,CATCH_AND_SHOOT,BALL_HANDLING,...,TURNOVERS,3PT_FREQ,POST_UP,DRIVING,MID-RANGE,DEFENSE,CLUTCHNESS,FAST_BREAK,SECOND_CHANCE,TEAM_IMPACT
0,53.760684,17.051282,13.361823,16.666667,2.236467,11.282051,74.779202,86.609687,66.923077,27.079772,...,93.746439,98.319088,28.347578,3.447293,60.498575,17.706553,16.524217,15.712251,25.170940,21.460114
1,60.455840,65.356125,70.854701,69.814815,30.113960,63.988604,76.032764,14.245014,16.011396,52.670940,...,61.552707,16.353276,88.518519,55.306268,25.997151,36.296296,65.099715,50.099715,82.393162,62.706553
2,42.735043,11.096866,51.980057,7.450142,59.957265,8.076923,47.948718,72.792023,57.564103,57.970085,...,70.783476,72.136752,28.347578,51.339031,72.364672,17.321937,36.182336,33.034188,34.558405,31.837607
3,58.475783,37.008547,30.413105,38.148148,58.304843,71.652422,86.111111,93.732194,87.621083,51.424501,...,74.344729,73.019943,28.347578,66.609687,55.925926,66.396011,52.564103,72.549858,48.660969,24.138177
4,63.276353,41.552707,25.099715,44.002849,86.282051,44.586895,94.601140,99.145299,55.740741,35.569801,...,40.968661,37.863248,42.934473,42.428775,18.860399,36.680912,40.313390,70.156695,75.997151,54.686610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
346,9.401709,71.396011,38.689459,72.264957,92.364672,87.521368,7.756410,10.541311,16.111111,34.779202,...,71.809117,22.905983,70.797721,20.619658,18.903134,44.985755,46.296296,10.712251,32.051282,30.113960
347,61.623932,76.324786,60.384615,77.464387,22.649573,82.079772,41.082621,24.216524,38.076923,49.045584,...,11.538462,27.578348,89.772080,38.703704,50.128205,68.048433,73.931624,14.985755,74.601140,49.964387
348,74.330484,45.897436,70.384615,42.336182,30.669516,23.789174,41.160969,41.310541,36.452991,65.512821,...,45.726496,58.860399,28.347578,83.148148,80.185185,52.094017,84.900285,83.561254,36.552707,67.293447
349,36.566952,59.017094,34.601140,57.621083,64.715100,24.031339,6.958689,19.800570,44.116809,46.111111,...,16.054131,64.131054,28.347578,35.071225,56.794872,44.230769,16.524217,45.384615,69.401709,22.307692


In [20]:
distances = ratings_df.apply(lambda row: distance.euclidean(row.values, user_profile_df.values[0]), axis=1)
distances

0      197.489033
1      180.477484
2      130.217167
3      129.696656
4      169.228779
          ...    
346    214.076574
347    200.580136
348     93.150806
349    182.437655
350    178.520274
Length: 351, dtype: float64

In [21]:
# Create a new DataFrame with distances and original indices
dist_df = pd.DataFrame({'Distance': distances})
final_dist_df = pd.concat([df[['PLAYER_ID', 'PLAYER_NAME']], dist_df], axis=1)
final_dist_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,Distance
0,1631260,AJ Green,197.489033
1,203932,Aaron Gordon,180.477484
2,1628988,Aaron Holiday,130.217167
3,1630174,Aaron Nesmith,129.696656
4,1630598,Aaron Wiggins,169.228779
...,...,...,...
346,1630214,Xavier Tillman,214.076574
347,1628380,Zach Collins,200.580136
348,203897,Zach LaVine,93.150806
349,1630533,Ziaire Williams,182.437655


In [22]:
top_10_closest_df = final_dist_df.sort_values(by='Distance').head(10)
top_10_closest_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,Distance
98,1630170,Devin Vassell,60.090292
20,1630559,Austin Reaves,76.159856
130,1630193,Immanuel Quickley,77.162441
321,1626179,Terry Rozier,78.602851
89,1627749,Dejounte Murray,81.541871
339,1626145,Tyus Jones,82.180614
29,203992,Bogdan Bogdanovic,89.878475
337,1630169,Tyrese Haliburton,89.906047
49,1629661,Cameron Johnson,89.995628
338,1630178,Tyrese Maxey,90.038492


## Print out the results

In [25]:
print("Here are the top 10 most similar active NBA players based on your responses.\n")

i = 1
for index, row in top_10_closest_df.iterrows():
    print(f"{i}: {row['PLAYER_NAME']}")
    print()
    i = i + 1

Here are the top 10 most similar active NBA players based on your responses.

1: Devin Vassell

2: Austin Reaves

3: Immanuel Quickley

4: Terry Rozier

5: Dejounte Murray

6: Tyus Jones

7: Bogdan Bogdanovic

8: Tyrese Haliburton

9: Cameron Johnson

10: Tyrese Maxey

