In [1]:
# Import needed dependencies
import requests
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from datetime import date
from bs4 import BeautifulSoup, Comment
pd.set_option('display.max_columns', None)

In [2]:
today = date.today()

# dd/mm/YY
current_year = today.strftime("%Y")

last_year = int(current_year) - 1

In [3]:
last_five_years = []
for i in range(0,5):
    last_five_years.append(int(current_year) - i)

In [4]:
# Create a list to help create a dataframe from batter statistics data
nba_stats = []

# Create a loop to create a dataframe from Basketball Reference tables
for year in last_five_years:
    
    # input URL and use BeautifulSoup to parse through the page
    url = f'https://www.basketball-reference.com/leagues/NBA_{year}_totals.html'
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    # Grab the table element that has NBA player statistics
    table = soup.select_one('#div_totals_stats') 

    # Grab data from table and put it into the list created above
    for tr in table.select('tr:has(td)'):
        tds = [td.get_text(strip=True) for td in tr.select('td')]
        tds.append(year)
        nba_stats.append(tds)

In [5]:
# Create dataframe for batter statistics
nba_stats_df = pd.DataFrame(nba_stats)

# Create an empty list to store dataframe header information
header_list = []

# Grab the table header information to use as column headers in our dataframe
for tr in table.select('tr:has(th)'):
    ths = [th.get_text(strip=True) for th in tr.select('th')]
    header_list.append(ths)

# For loop returns a list of lists, and we only need the first list 
df_headers = header_list[0]

# Remove the first item from our headers list, it is the index header that we do not need
df_headers.remove('Rk')
df_headers.append("Year")

# Set column headers equal to our list
nba_stats_df.columns = df_headers

In [6]:
# Change types of columns to numeric for columns with number values
nba_stats_df[['Age', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']] = nba_stats_df[['Age', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']].apply(pd.to_numeric)

# Drop all players with NaN games to remove null values
nba_stats_df.dropna(subset=['G'], axis = 0 , inplace= True)

# Remove any players with fewer than 27 games played (1/3rd of the season)
filtered_nba_stats_df = nba_stats_df[nba_stats_df['G'] >= 27]

# points, rebounds, assists, steals, blocks, threes, field-goal percentage, and free-throw percentage

# Select the columns we want for our NBA player analysis
yahoo_nba_df = filtered_nba_stats_df[['Year','Player','Age','Pos','Team','G','FG%','3P','FT%','TRB','AST','STL','BLK','PTS','TOV']]


In [7]:
# Sort by index to prepare to drop duplicates
yahoo_nba_df = yahoo_nba_df.sort_index()

# Drop duplicate entries of Player Name and Year
# This is to eliminate partial season data for players who played for 2+ teams in one season
yahoo_nba_df = yahoo_nba_df.drop_duplicates(subset=['Year', 'Player'])

In [8]:
# Sort data by name alphabetically, then by year in descending order
final_yahoo_nba_df = yahoo_nba_df.sort_values(['Year','Player'], ascending=[True, True])
final_yahoo_nba_df['TOV'] *= -1

final_yahoo_nba_df

Unnamed: 0,Year,Player,Age,Pos,Team,G,FG%,3P,FT%,TRB,AST,STL,BLK,PTS,TOV
3027,2020,Aaron Gordon,24.0,PF,ORL,62.0,0.437,73.0,0.674,475.0,228.0,51.0,39.0,894.0,-100.0
3105,2020,Aaron Holiday,23.0,PG,IND,66.0,0.414,87.0,0.851,156.0,225.0,55.0,16.0,627.0,-88.0
3245,2020,Abdel Nader,26.0,SF,OKC,55.0,0.468,48.0,0.773,100.0,38.0,23.0,20.0,345.0,-43.0
3417,2020,Admiral Schofield,22.0,PF,WAS,33.0,0.380,19.0,0.667,47.0,15.0,8.0,4.0,99.0,-7.0
3048,2020,Al Horford,33.0,C,PHI,67.0,0.450,99.0,0.763,456.0,270.0,52.0,61.0,798.0,-80.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,2024,Yuta Watanabe,29.0,SF,2TM,34.0,0.353,25.0,0.588,54.0,15.0,12.0,7.0,117.0,-19.0
167,2024,Zach Collins,26.0,C,SAS,69.0,0.484,58.0,0.753,370.0,194.0,34.0,52.0,776.0,-132.0
451,2024,Zeke Nnaji,23.0,PF,DEN,58.0,0.463,6.0,0.677,126.0,32.0,15.0,38.0,186.0,-27.0
299,2024,Ziaire Williams,22.0,SF,MEM,51.0,0.397,58.0,0.827,180.0,75.0,36.0,9.0,420.0,-66.0


In [9]:
# Create a new dataframe for stats percentile calculations
percentile_df = pd.DataFrame(columns = ['Year', 'Player', 'Age', 'Pos', 'Team', '3P_Percentile', 'FG%_Percentile', 'TRB_Percentile', 'FT%_Percentile', 'AST_Percentile', 'STL_Percentile', 'BLK_Percentile', 'PTS_Percentile','TOV_Percentile'])

# Carry over your non-numeric columnns from final_yahoo_nba_df
percentile_df['Year'] = final_yahoo_nba_df['Year']
percentile_df['Player'] = final_yahoo_nba_df['Player']
percentile_df['Age'] = final_yahoo_nba_df['Age']
percentile_df['Pos'] = final_yahoo_nba_df['Pos']
percentile_df['Team'] = final_yahoo_nba_df['Team']

# Calculate the percentile rank for each player in each season, seperately, then add all the seasons in one dataframe
for year in last_five_years:
    year_df = final_yahoo_nba_df.loc[final_yahoo_nba_df['Year'] == year]
    year_df['3P_Percentile'] = year_df['3P'].rank(pct=True)
    year_df['FG%_Percentile'] = year_df['FG%'].rank(pct=True)
    year_df['TRB_Percentile'] = year_df['TRB'].rank(pct=True)
    year_df['FT%_Percentile'] = year_df['FT%'].rank(pct=True)
    year_df['AST_Percentile'] = year_df['AST'].rank(pct=True)
    year_df['STL_Percentile'] = year_df['STL'].rank(pct=True)
    year_df['BLK_Percentile'] = year_df['BLK'].rank(pct=True)
    year_df['PTS_Percentile'] = year_df['PTS'].rank(pct=True)
    year_df['TOV_Percentile'] = year_df['TOV'].rank(pct=True)
    year_df.sort_values('Player', ascending=True)

    # Each of the seasons are added back to the percentile dataframe
    percentile_df = percentile_df.append(year_df, ignore_index=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['3P_Percentile'] = year_df['3P'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['FG%_Percentile'] = year_df['FG%'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['TRB_Percentile'] = year_df['TRB'].rank(pct=True)
A value is trying to be set on a 

In [10]:
percentile_df

Unnamed: 0,Year,Player,Age,Pos,Team,3P_Percentile,FG%_Percentile,TRB_Percentile,FT%_Percentile,AST_Percentile,STL_Percentile,BLK_Percentile,PTS_Percentile,TOV_Percentile,G,FG%,3P,FT%,TRB,AST,STL,BLK,PTS,TOV
0,2020,Aaron Gordon,24.0,PF,ORL,,,,,,,,,,,,,,,,,,,
1,2020,Aaron Holiday,23.0,PG,IND,,,,,,,,,,,,,,,,,,,
2,2020,Abdel Nader,26.0,SF,OKC,,,,,,,,,,,,,,,,,,,
3,2020,Admiral Schofield,22.0,PF,WAS,,,,,,,,,,,,,,,,,,,
4,2020,Al Horford,33.0,C,PHI,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4039,2020,Willie Cauley-Stein,26.0,C,2TM,0.027704,0.912929,0.758575,0.094987,0.369393,0.670185,0.918206,0.373351,0.71504,54.0,0.579,0.0,0.606,315.0,70.0,49.0,61.0,390.0,-40.0
4040,2020,Willy Hernangómez,25.0,C,CHO,0.121372,0.860158,0.296834,0.124011,0.101583,0.064644,0.184697,0.122691,0.8219,31.0,0.532,5.0,0.627,134.0,29.0,10.0,7.0,190.0,-30.0
4041,2020,Wilson Chandler,32.0,PF,BRK,0.378628,0.168865,0.3219,0.903694,0.14248,0.168865,0.345646,0.139842,0.76781,35.0,0.404,37.0,0.870,145.0,39.0,16.0,12.0,205.0,-34.0
4042,2020,Yogi Ferrell,26.0,PG,SAC,0.262533,0.294195,0.030343,0.854881,0.362797,0.189974,0.076517,0.156992,0.848285,50.0,0.420,24.0,0.857,49.0,69.0,18.0,4.0,218.0,-28.0


In [11]:
# Clean up the percentile dataframe, drop NaNs and remove unnecessary columns
percentile_df = percentile_df.dropna()
percentile_df = percentile_df.drop(['Team','G','FG%','3P','FT%','TRB','AST','STL','BLK','PTS','TOV'], axis = 1)

# Add a rank column that adds the percentiles from each category
# Average out percentile categories for FG% and FT% with PTS, as we might still want a player with somewhat  
## lower FG% and FT% if they score a lot of points
percentile_df['Rank'] = (percentile_df['3P_Percentile'] + ((percentile_df['FG%_Percentile'] + percentile_df['PTS_Percentile']) / 2) + percentile_df['TRB_Percentile'] + ((percentile_df['FT%_Percentile'] + percentile_df['PTS_Percentile']) / 2) + percentile_df['AST_Percentile'] + percentile_df['STL_Percentile'] + percentile_df['BLK_Percentile'] + percentile_df['PTS_Percentile'] + percentile_df['TOV_Percentile'])


In [12]:
percentile_df

Unnamed: 0,Year,Player,Age,Pos,3P_Percentile,FG%_Percentile,TRB_Percentile,FT%_Percentile,AST_Percentile,STL_Percentile,BLK_Percentile,PTS_Percentile,TOV_Percentile,Rank
2022,2024,A.J. Green,24.0,SG,0.533333,0.223457,0.096296,0.937037,0.114815,0.058025,0.07037,0.238272,0.950617,2.880247
2023,2024,A.J. Lawson,23.0,SG,0.179012,0.412346,0.044444,0.111111,0.050617,0.074074,0.044444,0.079012,0.930864,1.74321
2024,2024,Aaron Gordon,28.0,PF,0.37284,0.867901,0.888889,0.120988,0.797531,0.703704,0.803704,0.771605,0.254321,5.858642
2025,2024,Aaron Holiday,27.0,PG,0.623457,0.412346,0.245679,0.974074,0.617284,0.509877,0.135802,0.471605,0.560494,4.329012
2026,2024,Aaron Nesmith,24.0,SF,0.828395,0.732099,0.651852,0.496296,0.488889,0.785185,0.849383,0.71358,0.488889,6.133951
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4039,2020,Willie Cauley-Stein,26.0,C,0.027704,0.912929,0.758575,0.094987,0.369393,0.670185,0.918206,0.373351,0.71504,4.709763
4040,2020,Willy Hernangómez,25.0,C,0.121372,0.860158,0.296834,0.124011,0.101583,0.064644,0.184697,0.122691,0.8219,2.328496
4041,2020,Wilson Chandler,32.0,PF,0.378628,0.168865,0.3219,0.903694,0.14248,0.168865,0.345646,0.139842,0.76781,2.941293
4042,2020,Yogi Ferrell,26.0,PG,0.262533,0.294195,0.030343,0.854881,0.362797,0.189974,0.076517,0.156992,0.848285,2.658971


In [13]:
# Create a list of each unique player we have in our dataframe
player_list = percentile_df.Player.unique().tolist()

# Create a new dataframe for combined, averaged percentiles over the past 5 seasons
new_df = pd.DataFrame(columns = ['Player', 'Rank', 'Trend', 'Pos', 'Years', '3P_Percentile', 'FG%_Percentile', 'TRB_Percentile', 'FT%_Percentile', 'AST_Percentile', 'STL_Percentile', 'BLK_Percentile', 'PTS_Percentile','TOV_Percentile'])

# Update new dataframe with unique player list
new_df['Player'] = player_list

# Create a list for each percentile stat category for upcoming loop
player_trends = []
average_3P = []
average_FG = []
average_TRB = []
average_FT = []
average_AST = []
average_STL = []
average_BLK = []
average_PTS = []
average_TOV = []
average_Rank = []
year_count = []
pos = []

# Loop through each player, locate their percentile stats for each season, average them out
for player in player_list:
    player_df = percentile_df.loc[percentile_df['Player'] == player]
    
    # We want to find the slope of the line of best fit for each player's overall ranking each season
    x = np.array(player_df['Year'], dtype = float)
    y = np.array(player_df['Rank'], dtype = float)
    slope, intercept = np.polyfit(x, y, 1)
    player_trends.append(slope)
    
    # Find average of each player's percentiles from previous 5 seasons
    average_3P.append(sum(player_df['3P_Percentile']) / len(player_df['3P_Percentile']))
    average_FG.append(sum(player_df['FG%_Percentile']) / len(player_df['FG%_Percentile']))
    average_TRB.append(sum(player_df['TRB_Percentile']) / len(player_df['TRB_Percentile']))
    average_FT.append(sum(player_df['FT%_Percentile']) / len(player_df['FT%_Percentile']))
    average_AST.append(sum(player_df['AST_Percentile']) / len(player_df['AST_Percentile']))
    average_STL.append(sum(player_df['STL_Percentile']) / len(player_df['STL_Percentile']))
    average_BLK.append(sum(player_df['BLK_Percentile']) / len(player_df['BLK_Percentile']))
    average_PTS.append(sum(player_df['PTS_Percentile']) / len(player_df['PTS_Percentile']))
    average_TOV.append(sum(player_df['TOV_Percentile']) / len(player_df['TOV_Percentile']))
    average_Rank.append(sum(player_df['Rank']) / len(player_df['Rank']))
    year_count.append(len(x))
    
    # Keep player positions for reference purposes during the draft
    pos.append(player_df['Pos'].unique())

# Update new dataframe with the list data from each stat
new_df['Pos'] = pos
new_df['Trend'] = player_trends
new_df['3P_Percentile'] = average_3P
new_df['FG%_Percentile'] = average_FG
new_df['TRB_Percentile'] = average_TRB
new_df['FT%_Percentile'] = average_FT
new_df['AST_Percentile'] = average_AST
new_df['STL_Percentile'] = average_STL
new_df['BLK_Percentile'] = average_BLK
new_df['PTS_Percentile'] = average_PTS
new_df['TOV_Percentile'] = average_TOV
new_df['Rank'] = average_Rank

# Keep track of how many seasons are being considered, so we know how reliable the data is
new_df['Years'] = year_count



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_

In [14]:
# Create a weighted rank column by adding trend data to the rank data and account for number of seasons played
# Basically, if you played all 5 seasons, your trend stat is added directly
# If you played fewer than all 5 seasons, your trend stat is reduced depending on how few seasons you played
new_df['Weighted Rank'] = (new_df['Rank'] + ((new_df['Trend'] * (new_df['Years'] - 1) / 4)))

# shift column 'Weighted Rank' to first position
first_column = new_df.pop('Weighted Rank')
  
# insert column using insert(position,column_name,first_column) function
new_df.insert(1, 'Weighted Rank', first_column)

new_df = new_df.sort_values('Weighted Rank', ascending = False)
new_df.head(50)

Unnamed: 0,Player,Weighted Rank,Rank,Trend,Pos,Years,3P_Percentile,FG%_Percentile,TRB_Percentile,FT%_Percentile,AST_Percentile,STL_Percentile,BLK_Percentile,PTS_Percentile,TOV_Percentile
309,Nikola Jokić,7.24531,7.241552,0.003758,[C],5,0.641431,0.905583,0.992384,0.702533,0.992733,0.960128,0.877943,0.978469,0.015937
190,Jayson Tatum,7.241282,7.221188,0.020094,"[PF, SF]",5,0.980348,0.551333,0.940859,0.773164,0.884038,0.90616,0.827166,0.984818,0.050735
267,Luka Dončić,7.127514,6.998218,0.129296,[PG],5,0.963816,0.651968,0.948873,0.410095,0.98825,0.894732,0.679031,0.991542,0.009399
21,Anthony Edwards,7.069977,6.934958,0.180026,[SG],4,0.957162,0.428426,0.837249,0.554548,0.881225,0.972129,0.820035,0.969518,0.036634
230,Kawhi Leonard,7.00327,7.00047,0.003733,[SF],4,0.78605,0.764092,0.82644,0.908814,0.824716,0.943039,0.718594,0.927358,0.210462
394,Victor Wembanyama,6.970988,6.970988,0.001722,[C],1,0.78642,0.566667,0.977778,0.57037,0.814815,0.949383,1.0,0.933333,0.007407
289,Mikal Bridges,6.949963,6.879716,0.070247,"[SF, SG]",5,0.834935,0.686305,0.775311,0.765664,0.766881,0.947903,0.81813,0.849158,0.312258
240,Kevin Durant,6.931421,6.667847,0.351432,[PF],4,0.759418,0.838335,0.782641,0.91788,0.856964,0.555653,0.904143,0.926216,0.078487
358,Shai Gilgeous-Alexander,6.870116,6.601794,0.268323,"[PG, SG]",5,0.63856,0.716288,0.714692,0.744785,0.889023,0.820311,0.832299,0.93054,0.115291
311,Nikola Vučević,6.859962,6.9467,-0.086737,[C],5,0.781125,0.695706,0.988931,0.629156,0.830811,0.767282,0.888512,0.923414,0.180779


In [15]:
# new_df.to_csv("/Users/michaelbinger/Documents/Projects/Fantasy-Baseball-Analysis/Fantasy_Basketball/nba_trends.csv")
# final_yahoo_nba_df.to_csv("/Users/michaelbinger/Documents/Projects/Fantasy-Baseball-Analysis/Fantasy_Basketball/nba_stats.csv")


In [16]:
new_pos_list = []
pos_list = new_df['Pos'].tolist()
for item in pos_list:
    new_string = []
    for pos in item:
        string = str(pos)
        new_string = f'{new_string},{string}'
    new_pos_list.append(new_string)

final_pos_list = []
for i in new_pos_list:
    i = i.replace('[],', '')
    final_pos_list.append(i)
    
final_pos_list

new_df.drop('Pos', axis = 1, inplace = True)
new_df['Pos'] = final_pos_list

new_df

Unnamed: 0,Player,Weighted Rank,Rank,Trend,Years,3P_Percentile,FG%_Percentile,TRB_Percentile,FT%_Percentile,AST_Percentile,STL_Percentile,BLK_Percentile,PTS_Percentile,TOV_Percentile,Pos
309,Nikola Jokić,7.245310,7.241552,0.003758,5,0.641431,0.905583,0.992384,0.702533,0.992733,0.960128,0.877943,0.978469,0.015937,C
190,Jayson Tatum,7.241282,7.221188,0.020094,5,0.980348,0.551333,0.940859,0.773164,0.884038,0.906160,0.827166,0.984818,0.050735,"PF,SF"
267,Luka Dončić,7.127514,6.998218,0.129296,5,0.963816,0.651968,0.948873,0.410095,0.988250,0.894732,0.679031,0.991542,0.009399,PG
21,Anthony Edwards,7.069977,6.934958,0.180026,4,0.957162,0.428426,0.837249,0.554548,0.881225,0.972129,0.820035,0.969518,0.036634,SG
230,Kawhi Leonard,7.003270,7.000470,0.003733,4,0.786050,0.764092,0.826440,0.908814,0.824716,0.943039,0.718594,0.927358,0.210462,SF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
531,Keljin Blevins,1.456130,1.456130,0.000360,1,0.242788,0.009615,0.028846,0.042067,0.049279,0.106971,0.020433,0.031250,0.919471,SF
72,Colby Jones,1.372222,1.372222,0.000339,1,0.092593,0.100000,0.027160,0.029630,0.050617,0.032099,0.102469,0.019753,0.962963,SG
637,Malcolm Miller,1.325858,1.325858,0.000328,1,0.149077,0.241425,0.002639,0.006596,0.007916,0.011873,0.025066,0.002639,1.000000,SF
525,Joe Wieskamp,1.302885,1.302885,0.000322,1,0.175481,0.033654,0.004808,0.036058,0.016827,0.012019,0.044471,0.007212,1.000000,SG


In [17]:
draft_df = new_df

In [18]:
draft_df.head(50)

Unnamed: 0,Player,Weighted Rank,Rank,Trend,Years,3P_Percentile,FG%_Percentile,TRB_Percentile,FT%_Percentile,AST_Percentile,STL_Percentile,BLK_Percentile,PTS_Percentile,TOV_Percentile,Pos
309,Nikola Jokić,7.24531,7.241552,0.003758,5,0.641431,0.905583,0.992384,0.702533,0.992733,0.960128,0.877943,0.978469,0.015937,C
190,Jayson Tatum,7.241282,7.221188,0.020094,5,0.980348,0.551333,0.940859,0.773164,0.884038,0.90616,0.827166,0.984818,0.050735,"PF,SF"
267,Luka Dončić,7.127514,6.998218,0.129296,5,0.963816,0.651968,0.948873,0.410095,0.98825,0.894732,0.679031,0.991542,0.009399,PG
21,Anthony Edwards,7.069977,6.934958,0.180026,4,0.957162,0.428426,0.837249,0.554548,0.881225,0.972129,0.820035,0.969518,0.036634,SG
230,Kawhi Leonard,7.00327,7.00047,0.003733,4,0.78605,0.764092,0.82644,0.908814,0.824716,0.943039,0.718594,0.927358,0.210462,SF
394,Victor Wembanyama,6.970988,6.970988,0.001722,1,0.78642,0.566667,0.977778,0.57037,0.814815,0.949383,1.0,0.933333,0.007407,C
289,Mikal Bridges,6.949963,6.879716,0.070247,5,0.834935,0.686305,0.775311,0.765664,0.766881,0.947903,0.81813,0.849158,0.312258,"SF,SG"
240,Kevin Durant,6.931421,6.667847,0.351432,4,0.759418,0.838335,0.782641,0.91788,0.856964,0.555653,0.904143,0.926216,0.078487,PF
358,Shai Gilgeous-Alexander,6.870116,6.601794,0.268323,5,0.63856,0.716288,0.714692,0.744785,0.889023,0.820311,0.832299,0.93054,0.115291,"PG,SG"
311,Nikola Vučević,6.859962,6.9467,-0.086737,5,0.781125,0.695706,0.988931,0.629156,0.830811,0.767282,0.888512,0.923414,0.180779,C


In [19]:
######################################################################################################################
######################################################################################################################
######################################################################################################################
#### DRAFT DAY FUNCTIONS
   
# DROP A PLAYER 
def drafted(player):
    global draft_df
    draft_df = draft_df[draft_df.Player != player]
    return draft_df.head(25)
    
# FILTER PLAYERS BY POSITION
def position_filter(Pos):
    filtered_draft_df = draft_df[draft_df['Pos'].str.contains(Pos)]
    return filtered_draft_df.head(25)

# PULL STAT CATEGORY LEADERS
def stat_leaders(CAT):
    global final_yahoo_nba_df
    final_yahoo_nba_df = final_yahoo_nba_df.sort_values([CAT], ascending=[False])
    return final_yahoo_nba_df.head(25)

In [20]:
# draft_df = draft_df.sort_values(['Weighted Rank'], ascending=[False])
# draft_df.head(50)

In [89]:
drafted('LeBron James')
drafted('Donovan Mitchell')
drafted('Joel Embiid')
drafted('Victor Wembanyama')
drafted('Anthony Davis')
drafted('Nikola Jokić')
drafted('Anthony Edwards')
drafted('Kawhi Leonard')
drafted('Luka Dončić')
drafted('Giannis Antetokounmpo')
drafted('Tyrese Haliburton')
drafted('Chet Holmgren')
drafted('Ja Morant')
drafted('Shai Gilgeous-Alexander')
drafted('Jayson Tatum')
drafted('Tyrese Maxey')
drafted('Paul George')
drafted('Trae Young')
drafted('Desmond Bane')
drafted('Rudy Gobert')
drafted('Kevin Durant')
drafted('Devin Booker')
drafted('Domantas Sabonis')
drafted('Paolo Banchero')
drafted('Kyrie Irving')
drafted('Stephen Curry')
drafted("De'Aaron Fox")
drafted('James Harden')
drafted('Scottie Barnes')
drafted('Myles Turner')
drafted('Damian Lillard')
drafted('Evan Mobley')
drafted('Jaren Jackson Jr.')
drafted('Jaylen Brown')
drafted('Bam Adebayo')
drafted('Fred VanVleet')
drafted('Dejounte Murray')
drafted('Zach LaVine')
drafted('Lauri Markkanen')
drafted('Jalen Brunson')
drafted('Pascal Siakam')
drafted('Cade Cunningham')
drafted('DeMar DeRozan')
drafted('Jrue Holiday')
drafted('Brandon Miller')
drafted('Jordan Poole')
drafted('Franz Wagner')
drafted('Karl-Anthony Towns')
drafted('Alperen Sengun')
drafted('Derrick White')
drafted('CJ McCollum')
drafted('Nikola Vučević')
drafted('Buddy Hield')
drafted('Grayson Allen')
drafted('Jalen Williams')
drafted('Jalen Green')
drafted('Miles Bridges')
drafted('Kristaps Porziņģis')
drafted('Julius Randle')
drafted('Klay Thompson')
drafted('Jamal Murray')
drafted('Brook Lopez')
drafted('Herbert Jones')
drafted('Kyle Kuzma')
drafted('Trey Murphy III')
drafted('Terry Rozier')
drafted('Jimmy Butler')
drafted('Mikal Bridges')
drafted('Bogdan Bogdanović')

Unnamed: 0,Player,Weighted Rank,Rank,Trend,Years,3P_Percentile,FG%_Percentile,TRB_Percentile,FT%_Percentile,AST_Percentile,STL_Percentile,BLK_Percentile,PTS_Percentile,TOV_Percentile,Pos
231,Keegan Murray,6.778437,6.64206,0.54551,2,0.943707,0.466072,0.8415,0.607692,0.537197,0.844733,0.850178,0.794911,0.498042,SF
376,Tobias Harris,6.742606,6.795247,-0.052641,5,0.742589,0.719945,0.897082,0.821107,0.794656,0.75726,0.828471,0.879222,0.246218,"PF,SF"
77,D'Angelo Russell,6.257047,5.93558,0.321467,5,0.923266,0.395402,0.44391,0.638465,0.928719,0.752972,0.568395,0.844534,0.112316,PG
157,Jabari Smith Jr.,6.24564,6.130786,0.459415,2,0.80318,0.311995,0.939937,0.601403,0.537441,0.623501,0.920606,0.776286,0.296851,PF
288,Michael Porter Jr.,6.213103,5.982687,0.307222,4,0.85545,0.752393,0.813477,0.665477,0.368282,0.483527,0.774374,0.773614,0.431416,SF
323,P.J. Washington,6.131139,6.051323,0.079816,5,0.783834,0.472724,0.814705,0.270092,0.672157,0.794456,0.909959,0.727841,0.249123,"PF,C"
237,Kentavious Caldwell-Pope,6.087509,5.85513,0.232379,5,0.810865,0.48845,0.459173,0.782351,0.656421,0.895104,0.65132,0.684701,0.377445,SG
214,Josh Giddey,6.062041,5.804262,0.515559,3,0.574205,0.521868,0.907135,0.406161,0.937699,0.681424,0.679479,0.748778,0.062749,"SG,PF,SF"
499,Carmelo Anthony,6.014667,5.950606,0.128123,3,0.815436,0.368397,0.680959,0.803786,0.455581,0.65159,0.780883,0.798408,0.383247,PF
6,Al Horford,5.988912,5.949082,0.03983,5,0.711668,0.609712,0.799226,0.605322,0.723283,0.500833,0.874701,0.57581,0.580235,C
