## NBA GM Performance Analysis - Web Scraping - Draft Picks

### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
import requests
import time
import json
from bs4 import BeautifulSoup

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
years = list(range(2012,2022))

### Scraping from Basketball Reference - Historical Draft Picks

In [5]:
draft_pick_url = 'https://www.basketball-reference.com/draft/NBA_{}.html'

In [6]:
for year in years:
    url = draft_pick_url.format(year)
    data = requests.get(url)
    
    with open("../draft_pick_data/{}.html".format(year), "w+", encoding='utf-8') as f:
        f.write(data.text)

Testing with just 2012 to see how the data looks.

In [7]:
with open("../draft_pick_data/2012.html", encoding='utf-8') as f:
    page = f.read()
    
soup = BeautifulSoup(page, "html.parser")

draftpick2012 = soup.find(id="stats")

draftpick2012 = pd.read_html(str(draftpick2012))[0]

draftpick2012

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Round 1,Round 1,Unnamed: 5_level_0,Totals,Totals,Totals,Totals,Totals,Shooting,Shooting,Shooting,Per Game,Per Game,Per Game,Per Game,Advanced,Advanced,Advanced,Advanced
Unnamed: 0_level_1,Rk,Pk,Tm,Player,College,Yrs,G,MP,PTS,TRB,AST,FG%,3P%,FT%,MP,PTS,TRB,AST,WS,WS/48,BPM,VORP
0,1,1,NOH,Anthony Davis,Kentucky,13,779,26880,18824,8341,1976,.523,.298,.795,34.5,24.2,10.7,2.5,118.6,.212,6.0,54.0
1,2,2,CHA,Michael Kidd-Gilchrist,Kentucky,8,446,10978,3750,2420,515,.474,.272,.715,24.6,8.4,5.4,1.2,21.0,.092,-1.8,0.6
2,3,3,WAS,Bradley Beal,Florida,13,785,27055,16997,3194,3364,.464,.376,.822,34.5,21.7,4.1,4.3,57.9,.103,1.5,23.9
3,4,4,CLE,Dion Waiters,Syracuse,8,419,11835,5505,1108,1163,.412,.346,.694,28.2,13.1,2.6,2.8,8.8,.036,-2.1,-0.4
4,5,5,SAC,Thomas Robinson,Kansas,5,313,4204,1528,1507,190,.470,.000,.505,13.4,4.9,4.8,0.6,4.6,.053,-3.5,-1.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,56,56,TOR,Tomislav ZubÄiÄ,,,,,,,,,,,,,,,,,,
58,57,57,NJN,Ä°lkan Karaman,,,,,,,,,,,,,,,,,,
59,58,58,MIN,Robbie Hummel,Purdue,2,98,1397,379,266,50,.418,.343,.867,14.3,3.9,2.7,0.5,1.9,.066,-3.1,-0.4
60,59,59,SAS,Marcus Denmon,Missouri,,,,,,,,,,,,,,,,,


In [8]:
draftpick2012.columns = draftpick2012.columns.droplevel(0)

In [9]:
draftpick2012["Year"] = 2012

In [10]:
draftpick2012.columns

Index(['Rk', 'Pk', 'Tm', 'Player', 'College', 'Yrs', 'G', 'MP', 'PTS', 'TRB',
       'AST', 'FG%', '3P%', 'FT%', 'MP', 'PTS', 'TRB', 'AST', 'WS', 'WS/48',
       'BPM', 'VORP', 'Year'],
      dtype='object')

In [11]:
drop_cols = ['Rk', 'College', 'Yrs', 'G', 'MP', 'PTS', 'TRB',
       'AST', 'FG%', '3P%', 'FT%', 'MP', 'PTS', 'TRB', 'AST', 'WS', 'WS/48',
       'BPM', 'VORP']
draftpick2012 = draftpick2012.drop(drop_cols, axis = 1)

In [12]:
draftpick2012

Unnamed: 0,Pk,Tm,Player,Year
0,1,NOH,Anthony Davis,2012
1,2,CHA,Michael Kidd-Gilchrist,2012
2,3,WAS,Bradley Beal,2012
3,4,CLE,Dion Waiters,2012
4,5,SAC,Thomas Robinson,2012
...,...,...,...,...
57,56,TOR,Tomislav ZubÄiÄ,2012
58,57,NJN,Ä°lkan Karaman,2012
59,58,MIN,Robbie Hummel,2012
60,59,SAS,Marcus Denmon,2012


### Loop for Yearly Stats

In [13]:
dfs = []
for year in years:
    file_path = f"../draft_pick_data/{year}.html"
    
    with open(file_path, encoding='utf-8') as f:
        page = f.read()
    
    soup = BeautifulSoup(page, "html.parser")
    draftpicksstats = soup.find(id="stats")
    draftpicks = pd.read_html(str(draftpicksstats))[0]
    
    # Drop the first level of the columns
    draftpicks.columns = draftpicks.columns.droplevel(0)
    
    # Now add the 'Year' column after dropping the level
    draftpicks["Year"] = year
    
    # Drop unwanted columns
    drop_cols = ['Rk', 'College', 'Yrs', 'G', 'MP', 'PTS', 'TRB',
       'AST', 'FG%', '3P%', 'FT%', 'MP', 'PTS', 'TRB', 'AST', 'WS', 'WS/48',
       'BPM', 'VORP']
    draftpicks = draftpicks.drop(drop_cols, axis=1)
    
    # Append the cleaned data to the list
    dfs.append(draftpicks)

In [14]:
draftpicks_df = pd.concat(dfs)

In [15]:
draftpicks_df

Unnamed: 0,Pk,Tm,Player,Year
0,1,NOH,Anthony Davis,2012
1,2,CHA,Michael Kidd-Gilchrist,2012
2,3,WAS,Bradley Beal,2012
3,4,CLE,Dion Waiters,2012
4,5,SAC,Thomas Robinson,2012
...,...,...,...,...
57,56,CHO,Scottie Lewis,2021
58,57,CHO,BalÅ¡a Koprivica,2021
59,58,NYK,Jericho Sims,2021
60,59,BRK,RaiQuan Gray,2021


In [16]:
draftpicks_df['Rookie Year'] = draftpicks_df['Year'] + 1
draftpicks_df['Sophomore Year'] = draftpicks_df['Year'] + 2
draftpicks_df['Third Year'] = draftpicks_df['Year'] + 3

In [17]:
draftpicks_df[draftpicks_df['Player'] == 'Guerschon Yabusele']

Unnamed: 0,Pk,Tm,Player,Year,Rookie Year,Sophomore Year,Third Year
15,16,BOS,Guerschon Yabusele,2016,2017,2018,2019


In [None]:
draftpicks_df.to_csv('../outputs/draft_picks_2012-2021.csv', index=False)

In [18]:
draftpicks_df = pd.read_csv('../outputs/draft_picks_2012-2021.csv')

### Scraping from Basketball Reference - Historical Win Shares Data

In [19]:
win_shares_url = 'https://www.basketball-reference.com/leagues/NBA_{}_advanced.html'

In [20]:
years = list(range(2012,2025))

In [21]:
for year in years:
    url = win_shares_url.format(year)
    data = requests.get(url)
    
    with open("../win_shares_stats/{}.html".format(year), "w+", encoding='utf-8') as f:
        f.write(data.text)

Testing with just 2012 to see how the data looks.

In [22]:
with open("../win_shares_stats/2012.html", encoding='utf-8') as f:
    page = f.read()
    
soup = BeautifulSoup(page, "html.parser")

winsharesstats2012 = soup.find(id="advanced")

winsharesstats2012 = pd.read_html(str(winsharesstats2012))[0]
winsharesstats2012["Year"] = year

winsharesstats2012

Unnamed: 0,Rk,Player,Age,Team,Pos,G,GS,MP,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Awards,Year
0,1.0,Kevin Durant,23.0,OKC,SF,66.0,66.0,2546.0,26.2,0.610,0.265,0.386,1.9,20.4,11.8,17.5,1.8,2.2,14.0,31.3,8.5,3.7,12.2,0.230,6.2,0.8,7.0,5.8,"MVP-2,AS,NBA1",2024
1,2.0,Pau Gasol,31.0,LAL,PF,65.0,65.0,2430.0,20.4,0.547,0.029,0.285,8.8,21.8,15.6,17.2,0.8,2.6,12.1,22.1,5.2,3.1,8.3,0.165,2.6,0.8,3.4,3.3,,2024
2,3.0,Rudy Gay,25.0,MEM,SF,65.0,65.0,2422.0,17.8,0.521,0.162,0.246,6.1,14.1,10.1,10.8,2.1,1.8,12.0,25.1,2.8,3.3,6.0,0.120,1.7,0.2,1.9,2.4,,2024
3,4.0,Blake Griffin,22.0,LAC,PF,66.0,66.0,2392.0,23.4,0.557,0.016,0.458,10.7,25.1,17.8,16.6,1.2,1.7,10.9,26.6,6.6,2.6,9.2,0.185,3.9,-0.2,3.7,3.4,"AS,NBA2",2024
4,5.0,John Wall,21.0,WAS,PG,66.0,66.0,2386.0,17.7,0.502,0.047,0.450,2.2,12.0,7.1,36.9,2.1,1.8,19.2,24.9,1.7,1.9,3.5,0.071,1.3,-0.6,0.7,1.6,,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547,548.0,Keith Benson,23.0,GSW,C,3.0,0.0,9.0,6.0,0.000,0.000,0.000,25.3,12.6,18.9,0.0,0.0,0.0,0.0,5.1,0.0,0.0,0.0,-0.005,-1.4,-6.3,-7.7,0.0,,2024
548,549.0,Earl Barron,30.0,GSW,C,2.0,0.0,9.0,4.7,0.500,0.000,0.000,12.6,0.0,6.3,0.0,0.0,0.0,0.0,20.5,0.0,0.0,0.0,0.054,-11.5,-4.8,-16.3,0.0,,2024
549,550.0,Brian Skinner,35.0,MEM,C,1.0,0.0,4.0,-14.7,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.3,0.0,0.0,0.0,-0.248,-12.5,-3.0,-15.5,0.0,,2024
550,551.0,Hamady N'Diaye,25.0,WAS,C,3.0,0.0,3.0,-13.1,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.9,0.0,0.0,0.0,-0.369,-12.3,-8.8,-21.1,0.0,,2024


In [23]:
winsharesstats2012.columns

Index(['Rk', 'Player', 'Age', 'Team', 'Pos', 'G', 'GS', 'MP', 'PER', 'TS%',
       '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%',
       'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP',
       'Awards', 'Year'],
      dtype='object')

In [24]:
drop_cols = ['Rk', 'Age', 'G', 'GS', 'MP', 'PER', 'TS%',
       '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%',
       'USG%', 'OWS', 'DWS', 'OBPM', 'DBPM', 'BPM', 'VORP',
       'Awards']
winsharesstats2012 = winsharesstats2012.drop(drop_cols, axis = 1)

In [25]:
winsharesstats2012

Unnamed: 0,Player,Team,Pos,WS,WS/48,Year
0,Kevin Durant,OKC,SF,12.2,0.230,2024
1,Pau Gasol,LAL,PF,8.3,0.165,2024
2,Rudy Gay,MEM,SF,6.0,0.120,2024
3,Blake Griffin,LAC,PF,9.2,0.185,2024
4,John Wall,WAS,PG,3.5,0.071,2024
...,...,...,...,...,...,...
547,Keith Benson,GSW,C,0.0,-0.005,2024
548,Earl Barron,GSW,C,0.0,0.054,2024
549,Brian Skinner,MEM,C,0.0,-0.248,2024
550,Hamady N'Diaye,WAS,C,0.0,-0.369,2024


Need to clean out duplicate rows of players. Instead of keeping the TOT row, I will want to keep the row corresponding to the acquiring team.

### Loop for Yearly Stats

In [None]:
dfs = []
for year in years:
    file_path = f"../win_shares_stats/{year}.html"
    
    with open(file_path, encoding='utf-8') as f:
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    winsharesstats = soup.find(id="advanced")
    winshares = pd.read_html(str(winsharesstats))[0]
    winshares["Year"] = year
    
    drop_cols = ['Rk', 'Age', 'G', 'GS', 'MP', 'PER', 'TS%',
       '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%',
       'USG%', 'OWS', 'DWS', 'OBPM', 'DBPM', 'BPM', 'VORP',
       'Awards']
    winshares = winshares.drop(drop_cols, axis = 1)
    
    dfs.append(winshares)

In [108]:
winshares_df = pd.concat(dfs)

In [109]:
winshares_df.columns

Index(['Player', 'Team', 'Pos', 'WS', 'WS/48', 'Year'], dtype='object')

In [110]:
winshares_df[winshares_df['Player'] == 'Rajon Rondo']

Unnamed: 0,Player,Team,Pos,WS,WS/48,Year
56,Rajon Rondo,BOS,PG,4.9,0.121,2012
222,Rajon Rondo,BOS,PG,3.2,0.108,2013
305,Rajon Rondo,BOS,PG,1.1,0.054,2014
130,Rajon Rondo,2TM,PG,1.6,0.037,2015
131,Rajon Rondo,BOS,PG,0.9,0.064,2015
132,Rajon Rondo,DAL,PG,0.6,0.023,2015
36,Rajon Rondo,SAC,PG,4.6,0.087,2016
150,Rajon Rondo,CHI,PG,2.3,0.06,2017
173,Rajon Rondo,NOP,PG,3.6,0.101,2018
267,Rajon Rondo,LAL,PG,1.3,0.047,2019


In [111]:
def single_year(df):
    unique_teams = df["Team"].nunique()  # Get the number of unique teams
    
    if unique_teams == 1:
        return df  # Return the row if player only played for one team
    elif unique_teams == 3:
        return df[df["Team"] == "2TM"]
    elif unique_teams == 4:
        return df[df["Team"] == "3TM"]
    elif unique_teams == 5:
        return df[df["Team"] == "4TM"]
    else:
        return df

In [112]:
winshares_df['Team'].unique()

array(['OKC', 'LAL', 'MEM', 'LAC', 'WAS', 'MIL', 'ATL', 'MIA', 'POR',
       'PHI', 'TOR', 'SAC', 'NJN', 'CLE', 'MIN', 'DET', 'CHI', 'DEN',
       'GSW', '2TM', 'PHO', 'UTA', 'DAL', 'BOS', 'ORL', 'HOU', 'IND',
       'NYK', 'NOH', 'SAS', 'CHA', '3TM', nan, 'BRK', 'NOP', 'CHO', '4TM'],
      dtype=object)

In [113]:
winshares_df = winshares_df.groupby(["Player", "Year"]).apply(single_year).reset_index(drop=True)

In [114]:
winshares_df[winshares_df['Player'] == 'Rajon Rondo']

Unnamed: 0,Player,Team,Pos,WS,WS/48,Year
5337,Rajon Rondo,BOS,PG,4.9,0.121,2012
5338,Rajon Rondo,BOS,PG,3.2,0.108,2013
5339,Rajon Rondo,BOS,PG,1.1,0.054,2014
5340,Rajon Rondo,2TM,PG,1.6,0.037,2015
5341,Rajon Rondo,SAC,PG,4.6,0.087,2016
5342,Rajon Rondo,CHI,PG,2.3,0.06,2017
5343,Rajon Rondo,NOP,PG,3.6,0.101,2018
5344,Rajon Rondo,LAL,PG,1.3,0.047,2019
5345,Rajon Rondo,LAL,PG,1.4,0.068,2020
5346,Rajon Rondo,2TM,PG,1.3,0.081,2021


In [None]:
winshares_df.to_csv('../inputs/win_shares_stats_2012-2021.csv', index=False)

In [26]:
winshares_df = pd.read_csv('../inputs/win_shares_stats_2012-2021.csv')

## Merging the Two Dataframes

In [89]:
draftpicks_df.columns

Index(['Pk', 'Tm', 'Player', 'Year', 'Rookie Year', 'Sophomore Year',
       'Third Year'],
      dtype='object')

In [78]:
winshares_df.columns

Index(['Player', 'Team', 'Pos', 'WS', 'WS/48', 'Year'], dtype='object')

In [117]:
merged_df = pd.merge(draftpicks_df, winshares_df[['Player', 'Year', 'WS']], 
                     left_on=['Player', 'Rookie Year'], 
                     right_on=['Player', 'Year'], 
                     how='left')

In [118]:
merged_df = pd.merge(merged_df, winshares_df[['Player', 'Year', 'WS']], 
                     left_on=['Player', 'Sophomore Year'], 
                     right_on=['Player', 'Year'], 
                     how='left')

In [119]:
merged_df = pd.merge(merged_df, winshares_df[['Player', 'Year', 'WS']], 
                     left_on=['Player', 'Third Year'], 
                     right_on=['Player', 'Year'], 
                     how='left')

  merged_df = pd.merge(merged_df, winshares_df[['Player', 'Year', 'WS']],


In [120]:
merged_df

Unnamed: 0,Pk,Tm,Player,Year_x,Rookie Year,Sophomore Year,Third Year,Year_y,WS_x,Year_x.1,WS_y,Year_y.1,WS
0,1,NOH,Anthony Davis,2012,2013,2014,2015,2013.0,6.1,2014.0,10.4,2015.0,14.0
1,2,CHA,Michael Kidd-Gilchrist,2012,2013,2014,2015,2013.0,2.1,2014.0,3.1,2015.0,3.8
2,3,WAS,Bradley Beal,2012,2013,2014,2015,2013.0,3.0,2014.0,4.0,2015.0,3.7
3,4,CLE,Dion Waiters,2012,2013,2014,2015,2013.0,0.9,2014.0,1.6,2015.0,1.1
4,5,SAC,Thomas Robinson,2012,2013,2014,2015,2013.0,-0.1,2014.0,1.6,2015.0,1.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
616,56,CHO,Scottie Lewis,2021,2022,2023,2024,2022.0,0.0,,,,
617,57,CHO,BalÅ¡a Koprivica,2021,2022,2023,2024,,,,,,
618,58,NYK,Jericho Sims,2021,2022,2023,2024,2022.0,1.5,2023.0,2.6,2024.0,1.5
619,59,BRK,RaiQuan Gray,2021,2022,2023,2024,,,2023.0,0.1,2024.0,0.2


In [121]:
winshares_df[winshares_df['Player'] == 'Anthony Davis']

Unnamed: 0,Player,Team,Pos,WS,WS/48,Year
335,Anthony Davis,NOH,PF,6.1,0.159,2013
336,Anthony Davis,NOP,PF,10.4,0.212,2014
337,Anthony Davis,NOP,PF,14.0,0.274,2015
338,Anthony Davis,NOP,C,7.2,0.16,2016
339,Anthony Davis,NOP,C,11.0,0.195,2017
340,Anthony Davis,NOP,PF,13.7,0.241,2018
341,Anthony Davis,NOP,C,9.5,0.247,2019
342,Anthony Davis,LAL,PF,11.1,0.25,2020
343,Anthony Davis,LAL,PF,3.7,0.152,2021
344,Anthony Davis,LAL,C,4.5,0.155,2022


In [122]:
merged_df = merged_df.rename(columns={
    'WS_x': 'Rookie_WS',
    'WS_y': 'Sophomore_WS',
    'WS': 'Third_WS'
})

In [123]:
merged_df[merged_df['Player'] == 'Anthony Davis']

Unnamed: 0,Pk,Tm,Player,Year_x,Rookie Year,Sophomore Year,Third Year,Year_y,Rookie_WS,Year_x.1,Sophomore_WS,Year_y.1,Third_WS
0,1,NOH,Anthony Davis,2012,2013,2014,2015,2013.0,6.1,2014.0,10.4,2015.0,14.0


In [126]:
merged_df[merged_df['Year_x'] == 2021]

Unnamed: 0,Pk,Tm,Player,Year_x,Rookie Year,Sophomore Year,Third Year,Year_y,Rookie_WS,Year_x.1,Sophomore_WS,Year_y.1,Third_WS
559,1,DET,Cade Cunningham,2021,2022,2023,2024,2022.0,-0.5,2023.0,-0.1,2024.0,1.8
560,2,HOU,Jalen Green,2021,2022,2023,2024,2022.0,0.7,2023.0,1.8,2024.0,3.1
561,3,CLE,Evan Mobley,2021,2022,2023,2024,2022.0,5.2,2023.0,8.5,2024.0,5.5
562,4,TOR,Scottie Barnes,2021,2022,2023,2024,2022.0,6.6,2023.0,5.0,2024.0,4.3
563,5,ORL,Jalen Suggs,2021,2022,2023,2024,2022.0,-1.6,2023.0,1.4,2024.0,4.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...
616,56,CHO,Scottie Lewis,2021,2022,2023,2024,2022.0,0.0,,,,
617,57,CHO,BalÅ¡a Koprivica,2021,2022,2023,2024,,,,,,
618,58,NYK,Jericho Sims,2021,2022,2023,2024,2022.0,1.5,2023.0,2.6,2024.0,1.5
619,59,BRK,RaiQuan Gray,2021,2022,2023,2024,,,2023.0,0.1,2024.0,0.2


In [11]:
merged_df.isnull().sum()

Pk                 10
Tm                 10
Player              0
Year_x              0
Rookie Year         0
Sophomore Year      0
Third Year          0
Year_y            137
Rookie_WS         137
Year_x.1          130
Sophomore_WS      130
Year_y.1          184
Third_WS          184
dtype: int64

In [12]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 621 entries, 0 to 620
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Pk              611 non-null    object 
 1   Tm              611 non-null    object 
 2   Player          621 non-null    object 
 3   Year_x          621 non-null    int64  
 4   Rookie Year     621 non-null    int64  
 5   Sophomore Year  621 non-null    int64  
 6   Third Year      621 non-null    int64  
 7   Year_y          484 non-null    float64
 8   Rookie_WS       484 non-null    float64
 9   Year_x.1        491 non-null    float64
 10  Sophomore_WS    491 non-null    float64
 11  Year_y.1        437 non-null    float64
 12  Third_WS        437 non-null    float64
dtypes: float64(6), int64(4), object(3)
memory usage: 63.2+ KB


In [14]:
merged_df['Pk'] = merged_df['Pk'].fillna(0)

In [16]:
print(merged_df['Pk'].unique())

['1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14' '15' '16'
 '17' '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29' '30' 0
 'Pk' '31' '32' '33' '34' '35' '36' '37' '38' '39' '40' '41' '42' '43'
 '44' '45' '46' '47' '48' '49' '50' '51' '52' '53' '54' '55' '56' '57'
 '58' '59' '60']


In [17]:
merged_df[merged_df['Pk'] == 'Pk']

Unnamed: 0,Pk,Tm,Player,Year_x,Rookie Year,Sophomore Year,Third Year,Year_y,Rookie_WS,Year_x.1,Sophomore_WS,Year_y.1,Third_WS
31,Pk,Tm,Player,2012,2013,2014,2015,,,,,,
93,Pk,Tm,Player,2013,2014,2015,2016,,,,,,
156,Pk,Tm,Player,2014,2015,2016,2017,,,,,,
218,Pk,Tm,Player,2015,2016,2017,2018,,,,,,
280,Pk,Tm,Player,2016,2017,2018,2019,,,,,,
342,Pk,Tm,Player,2017,2018,2019,2020,,,,,,
404,Pk,Tm,Player,2018,2019,2020,2021,,,,,,
466,Pk,Tm,Player,2019,2020,2021,2022,,,,,,
528,Pk,Tm,Player,2020,2021,2022,2023,,,,,,
590,Pk,Tm,Player,2021,2022,2023,2024,,,,,,


In [18]:
merged_df = merged_df[merged_df['Pk'] != 'Pk']

In [19]:
merged_df['Pk'] = merged_df['Pk'].astype(int)

In [20]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 611 entries, 0 to 620
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Pk              611 non-null    int32  
 1   Tm              601 non-null    object 
 2   Player          611 non-null    object 
 3   Year_x          611 non-null    int64  
 4   Rookie Year     611 non-null    int64  
 5   Sophomore Year  611 non-null    int64  
 6   Third Year      611 non-null    int64  
 7   Year_y          484 non-null    float64
 8   Rookie_WS       484 non-null    float64
 9   Year_x.1        491 non-null    float64
 10  Sophomore_WS    491 non-null    float64
 11  Year_y.1        437 non-null    float64
 12  Third_WS        437 non-null    float64
dtypes: float64(6), int32(1), int64(4), object(2)
memory usage: 64.4+ KB


The following dataset has every pick for the last ten years before 2022 and the win shares in the seasons following the year they were drafted.

In [None]:
merged_df.to_csv('../outputs/draft_pick_win_shares_2012-2021.csv', index=False)

In [27]:
merged_df = pd.read_csv('../outputs/draft_pick_win_shares_2012-2021.csv')