# Basketball-reference.com Web Scraper

The following code will pull and assemble a table of all the game stats of each player drafted into the NBA. Three separate tables will be created: A 'draft' table which summarizes all the players drafted and player specific information such as draft position, height, weight, and more. The second table will assemble all game logs found for the player found under international leagues and college basketball for all years prior to their draft year. The third table is all game stats for the player in the NBA. The datasets are not 100% inclusive due to some missing player info on basketball-reference.com. 

This code is flexible in that periods can be combined and not all the desired periods need to be run in the same session. Subsequent scrapings will be combined with previous scrapes and any overlapping years and or duplicates will be eliminated :) 

## Set file path and select years and then run all

In [634]:
path = 'C:/Users/erler/OneDrive/Documents/Random Data Sets/Basketball/'
#choose years span (though note that it runs extremely slow for many years at a time)
first_year = 2011
last_year = 2011
drafts_file_name = 'draft1'
preNBA_file_name = 'pre1'
gamelog_file_name = 'nba1'

### Import packages

In [669]:
#import required packages
import bs4 as bs
import urllib.request
import pandas as pd
import unicodedata
from itertools import cycle
import itertools
import re
import time
import numpy as np
import os
from datetime import datetime, timedelta 
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

sauce = urllib.request.urlopen('https://www.basketball-reference.com/draft/NBA_2018.html').read()
soup = bs.BeautifulSoup(sauce, 'lxml')

def make_inches(x):
    try: 
        return float(x.split('-')[0])*12+float(x.split('-')[1])
    except:
        pass
    
def linkfix(x, testlink): #normalize links for website quirks
    if x[-1] != '/':
        x+='/'
    if x[:len(testlink)] == testlink:
        pass
    else:
        x=testlink+x
    return x

def minplayed(x): #convert minutes played into datetime format
    if ':' in x:
        a = pd.to_timedelta(float(x.split(':')[0]), unit= 'min')
        b = pd.to_timedelta(float(x.split(':')[1]), unit= 's')
        return a+b
    else:
        return pd.to_timedelta(float(x), unit='min')  

def ages(x): #convert age to float
        a = float(x.split('-')[0])
        b = float(x.split('-')[1])/365
        return a+b    
    
def outcome(x):
    try: 
        return x.split('(')[0][0]
    except:
        pass
    
def outcomeq(x):
    try: 
        return int(x.split('(')[1][:-1])
    except:
        pass

# Create table of all NBA picks for a given timespan

In [528]:
ticB1 = time.perf_counter()
years = [i for i in range(first_year, last_year+1)]
p1 = 'https://www.basketball-reference.com/draft/NBA_'
p2 = '.html'
drafts = []
for i in years:
    x = pd.read_html(p1+str(i)+p2)[0].droplevel(0, axis=1) #get each years draft class
    x['Year'] = i #add year to table
    x = x.loc[x.Rk.str.isnumeric()==True] #filter for headers and non-data
    x['pro_link']=np.nan #new
    sauce = urllib.request.urlopen(p1+str(i)+p2).read() #import html data
    soup = bs.BeautifulSoup(sauce, 'lxml') 
    table = soup.table #find table
    table_rows = table.find_all('tr') #find table rows
    for tr in table_rows: #find table links for players page and put in a list
        try:
            pick = int(tr.find_all('td')[0].get('csk')) #get pick number
        except:
            pass
        a = tr.find_all('a')
        for url in a: 
            if url.get('href')[:9] == '/players/':
                x.loc[x['Pk']==str(pick), 'pro_link'] = url.get('href')
                                
    drafts.append(x) #append each year
    
drafts = pd.concat(drafts)  #concat each df/year into one dataframe

#make a player name column that matches url use
drafts.Player = drafts.Player.replace({' ': '-'}, regex=True).str.lower() 
drafts['Player_url'] = [unicodedata.normalize('NFD', t).encode('ascii', 'ignore').decode('utf-8') for t in drafts.Player]
drafts.reset_index(drop=True, inplace=True)

tocB1 = time.perf_counter()
print(f"{tocB1 - ticB1:0.4f} seconds") #revised

1.0368 seconds


# Retrieve urls for college or international league gamelogs

In [529]:
ticB2 = time.perf_counter()

drafts['cbb_gamelogs'] = np.nan #set destination column
for j in range(len(drafts)+1): #for all players
    leagues = []
    seasons = []
    try:
        sauce = urllib.request.urlopen('https://www.basketball-reference.com'+drafts.pro_link[j]) #url main page of each player
        soup = bs.BeautifulSoup(sauce, 'lxml') 

        drafts.loc[j, 'Pro_height'] = soup.find_all('span',itemprop="height")[0].text
        drafts.loc[j, 'Pro_weight'] = re.sub("[^0-9]",'', soup.find_all('span',itemprop="weight")[0].text)
        drafts.loc[j, 'DOB'] = [i.get('data-birth') for i in soup.find_all('span') if i.get('data-birth')][0]

        a = soup.find_all('a') #find all links
        testlink='https://www.basketball-reference.com'  
        for url in a:
            try: #retrieve link to gamelogs for college games from pro profile
                if url.get('href')[:45] == 'https://www.sports-reference.com/cbb/players/':
                    linkfull = url.get('href')
                    sauce = urllib.request.urlopen(linkfull) #url main page of each cbb player
                    soup = bs.BeautifulSoup(sauce, 'lxml')
                    drafts.loc[j, 'cbb_height'] = soup.find_all('span',itemprop="height")[0].text #get cbb height
                    drafts.loc[j, 'cbb_weight'] = re.sub("[^0-9]",'', soup.find_all('span',itemprop="weight")[0].text) #get cbb weight
                    link = linkfull[:-5]+'/gamelog/' #adjust urls to get gamelogs
                    leagues.append(linkfix(link, 'https://www.sports-reference.com'))                     

                #retrieve links to gamelogs for international league games from pro profile
                if url.get('href')[:59] == 'https://www.basketball-reference.com/international/players/':
                    link = url.get('href') #get all links to intl play
                    sauce2 = urllib.request.urlopen(link) #get html
                    soup2 = bs.BeautifulSoup(sauce2, 'lxml') 
                    a2 = soup2.find_all('a') #find all links to leagues for each player from intl page
                    for url2 in a2: #get the links for gamelogs
                        try:
                            if 'gamelog' in url2.get('href'):
                                x = url2.get('href')
                                leagues.append(linkfix(x, testlink))

                        except:
                            pass

                #Retrieve NBA gamelogs        
                if 'gamelog' in url.get('href'):
                    x = url.get('href')
                    seasons.append(linkfix(x, testlink))   

            except:
                pass
    except:
        pass    
        
    leagues=set(leagues) #eliminate duplicates
    seasons=set(seasons) #eliminate duplicates
    drafts.loc[j, 'cbb_gamelogs'] = [leagues] #store in player table
    drafts.loc[j, 'pro_gamelogs'] = [seasons] #store in player table
    #print(j)
    
tocB2 = time.perf_counter()
print(f"{tocB2 - ticB2:0.4f} seconds")  #revised 

84.6418 seconds


In [541]:
#formating
drafts = drafts.drop([drafts.loc[drafts.Year.isnull()].index[0]], axis=0)

drafts.Pro_height = drafts.Pro_height.astype(str)   
drafts.Pro_height = drafts.Pro_height.apply(lambda x: make_inches(x))
drafts.loc[drafts.cbb_height.isna()!=True, 'cbb_height'] = drafts.loc[drafts.cbb_height.isna()!=True, 'cbb_height'].apply(lambda x: make_inches(x))
drafts.DOB = drafts.DOB.astype('datetime64')

drafts.Year = pd.to_datetime(drafts.Year, format='%Y') #dt format
drafts.Year = drafts.Year.apply(lambda x: x+pd.to_timedelta(180, unit='days')) #estimate draft date (used for cutoff purposes)

In [542]:
#save file
drafts.to_csv(path+drafts_file_name+str(first_year)+'_'+str(last_year)+'.csv', index=None) #save to folder in case failure
drafts=pd.DataFrame()
files = os.listdir(path)
files = [f for f in files if drafts_file_name in f]
for f in files:
    drafts_current = pd.read_csv(path+f)
    #drafts['cbb_gamelogs'] = drafts['cbb_gamelogs'].apply(lambda x: eval(x))
    #drafts['pro_gamelogs'] = drafts['pro_gamelogs'].apply(lambda x: eval(x))
    drafts_current.Year = pd.to_datetime(drafts_current.Year) #dt format
    drafts_current.DOB = pd.to_datetime(drafts_current.DOB)
    drafts = pd.concat([drafts, drafts_current])

drafts = drafts.drop_duplicates(subset=['Player', 'DOB'])
drafts.to_csv(path+drafts_file_name+'.csv', index=None)

# Assemble pre-NBA gamelogs into database

In [553]:
#if drafts table needs to be loaded and formatting needs to be restored
drafts = pd.read_csv(path+drafts_file_name+'.csv')
drafts['cbb_gamelogs'] = drafts['cbb_gamelogs'].apply(lambda x: eval(x))
drafts['pro_gamelogs'] = drafts['pro_gamelogs'].apply(lambda x: eval(x))
drafts.DOB = pd.to_datetime(drafts.DOB) #dt format
drafts.Year = pd.to_datetime(drafts.Year)

#limit data scrape to desired years range
drafts_iter = drafts.loc[(pd.DatetimeIndex(drafts.Year).year.astype(int)>=first_year)&(pd.DatetimeIndex(drafts.Year).year.astype(int)<=last_year)]

In [554]:
ticB3 = time.perf_counter()

stats = [] #destination list

for i in iter(drafts_iter.index):
    try:
        for q in drafts.loc[i, 'cbb_gamelogs']: #for each league
            x = pd.read_html(q)[0] #pull in gamelogs table
            x['League'] = 'NCAA' if q.split('/')[-2] == 'gamelog' else q.split('/')[-2] #league name
            #conform headers
            x.rename(columns={'School': 'Team', 'Unnamed: 7':'W/L', 'Unnamed: 5':'W/L', 'Unnamed: 4':'Location', 'Unnamed: 3':'Location'}, inplace=True)
            x['Player']=drafts.loc[i,'Player'] #player name
            x['DOB']=drafts.loc[i,'DOB'] #player name
            if x.Rk.dtype != 'O':        #remove junk
                x = x.loc[x.Rk.isna()!=True]
                stats.append(x)
            else:
                x = x.loc[x.Rk.str.isdigit()==True]
                stats.append(x)
    except:
        pass 
    #print(i)
Predraft_game_stats = pd.concat(stats) #combine dataframes

tocB3 = time.perf_counter()

print(f"{tocB3 - ticB3:0.4f} seconds")  #revised

147.7566 seconds


In [555]:
#Combine previous scrapes and current into one file 

#formatting
Predraft_game_stats.Date = pd.to_datetime(Predraft_game_stats.Date)
Predraft_game_stats.Rk = pd.to_numeric(Predraft_game_stats.Rk)
Predraft_game_stats[Predraft_game_stats.loc[:,'FG':'PTS'].columns.values] = Predraft_game_stats[Predraft_game_stats.loc[:,'FG':'PTS'].columns.values].astype('float64')
Predraft_game_stats = Predraft_game_stats.convert_dtypes(infer_objects=True)   

Predraft_game_stats['MP'] = Predraft_game_stats['MP'].astype(str)
Predraft_game_stats['MP'] = Predraft_game_stats['MP'].apply(lambda x: minplayed(x))
Predraft_game_stats.Date = Predraft_game_stats.Date.astype('datetime64')

Predraft_game_stats = pd.merge(Predraft_game_stats, drafts[['Player', 'DOB', 'Year']], how='left', left_on=['Player', 'DOB'], right_on=['Player', 'DOB'], copy=False, validate='m:m')
Predraft_game_stats.Year = Predraft_game_stats.Year.apply(lambda x: x+pd.to_timedelta(180, unit='days'))
Predraft_game_stats = Predraft_game_stats.loc[Predraft_game_stats.Date<Predraft_game_stats.Year] #elimate intl stats after draft day


#force int64 to float64 for cooperation with statsmodel package
for i in Predraft_game_stats.columns:
    if Predraft_game_stats[i].dtype == 'Int64':
        Predraft_game_stats[i] = Predraft_game_stats[i].astype('float64')



In [623]:
Predraft_game_stats

Unnamed: 0,Rk,Date,Team,Location,Opponent,W/L,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,League,Player,DOB,Season,Type,GS,2P,2PA,2P%,Draft_year
0,1.0,2003-03-21,Syracuse,N,Manhattan,W,00:38:00,7.0,15.0,0.467,0.0,2.0,0.000,3.0,5.0,0.600,0.0,9.0,9.0,1.0,0.0,2.0,2.0,1.0,17.0,,NCAA,carmelo-anthony,1984-05-29,2002-03,NCAA,1.0,7.0,13.0,0.538,2003
1,2.0,2003-03-23,Syracuse,N,Oklahoma State,W,00:32:00,5.0,16.0,0.313,1.0,2.0,0.500,2.0,6.0,0.333,0.0,8.0,8.0,2.0,3.0,1.0,3.0,4.0,13.0,,NCAA,carmelo-anthony,1984-05-29,2002-03,NCAA,1.0,4.0,14.0,0.286,2003
2,3.0,2003-03-28,Syracuse,N,Auburn,W,00:34:00,7.0,17.0,0.412,2.0,4.0,0.500,2.0,3.0,0.667,4.0,4.0,8.0,3.0,2.0,1.0,2.0,2.0,18.0,,NCAA,carmelo-anthony,1984-05-29,2002-03,NCAA,1.0,5.0,13.0,0.385,2003
3,4.0,2003-03-30,Syracuse,N,Oklahoma,W,00:40:00,9.0,16.0,0.563,1.0,4.0,0.250,1.0,2.0,0.500,4.0,6.0,10.0,1.0,2.0,0.0,4.0,3.0,20.0,,NCAA,carmelo-anthony,1984-05-29,2002-03,NCAA,1.0,8.0,12.0,0.667,2003
4,5.0,2003-04-05,Syracuse,N,Texas,W,00:37:00,12.0,19.0,0.632,3.0,4.0,0.750,6.0,7.0,0.857,4.0,10.0,14.0,1.0,3.0,0.0,1.0,3.0,33.0,,NCAA,carmelo-anthony,1984-05-29,2002-03,NCAA,1.0,9.0,15.0,0.600,2003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59788,101.0,2011-03-10,Washington,N,Washington State,W,00:40:00,8.0,13.0,0.615,3.0,5.0,0.600,2.0,3.0,0.667,1.0,1.0,2.0,11.0,3.0,0.0,3.0,1.0,21.0,,NCAA,isaiah-thomas,1989-02-07,2010-11,CTOURN,1.0,5.0,8.0,0.625,2011
59789,102.0,2011-03-11,Washington,N,Oregon,W,00:38:00,2.0,11.0,0.182,1.0,4.0,0.250,5.0,8.0,0.625,0.0,3.0,3.0,12.0,3.0,0.0,4.0,3.0,10.0,,NCAA,isaiah-thomas,1989-02-07,2010-11,CTOURN,1.0,1.0,7.0,0.143,2011
59790,103.0,2011-03-12,Washington,N,Arizona,W,00:45:00,10.0,16.0,0.625,4.0,7.0,0.571,4.0,7.0,0.571,2.0,3.0,5.0,7.0,2.0,0.0,5.0,4.0,28.0,,NCAA,isaiah-thomas,1989-02-07,2010-11,CTOURN,1.0,6.0,9.0,0.667,2011
59791,104.0,2011-03-18,Washington,N,Georgia,W,00:34:00,6.0,14.0,0.429,0.0,2.0,0.000,7.0,7.0,1.000,0.0,1.0,1.0,7.0,2.0,0.0,2.0,3.0,19.0,,NCAA,isaiah-thomas,1989-02-07,2010-11,NCAA,1.0,6.0,12.0,0.500,2011


In [624]:
#save file
Predraft_game_stats.to_csv(path+preNBA_file_name+str(first_year)+'_'+str(last_year)+'.csv', index=None)
Predraft_game_stats = pd.DataFrame()
files = os.listdir(path)
files = [f for f in files if preNBA_file_name in f]
for f in files:
    
    Predraft_game_stats_current = pd.read_csv(path+f)
    try:
        Predraft_game_stats_current['Draft_year'] = pd.to_datetime(Predraft_game_stats_current.Year)
        Predraft_game_stats_current['Draft_year'] = pd.DatetimeIndex(Predraft_game_stats_current.Draft_year).year
        Predraft_game_stats_current = Predraft_game_stats_current.drop(['Year'], axis=1)
    except:
        Predraft_game_stats_current['Draft_year'] = pd.to_datetime(Predraft_game_stats_current.Draft_year, format= '%Y')
        Predraft_game_stats_current['Draft_year'] = pd.DatetimeIndex(Predraft_game_stats_current.Draft_year).year
    
    Predraft_game_stats = pd.concat([Predraft_game_stats, Predraft_game_stats_current]).drop_duplicates(subset=['DOB', 'Draft_year', 'Team', 'Rk', 'Player', 'Season', 'Date','Opponent'])

Predraft_game_stats.MP = pd.to_timedelta(Predraft_game_stats.MP)
Predraft_game_stats.Date = pd.to_datetime(Predraft_game_stats.Date)
Predraft_game_stats.DOB = pd.to_datetime(Predraft_game_stats.DOB)    
Predraft_game_stats = Predraft_game_stats.drop_duplicates(subset=['DOB', 'Draft_year', 'Team', 'Rk', 'Player', 'Season', 'Date','Opponent'])
Predraft_game_stats.to_csv(path+preNBA_file_name+'.csv', index=None)

# Assemble NBA gamelogs into database

In [641]:
#if drafts table needs to be loaded and formatting needs to be restored
drafts = pd.read_csv(path+drafts_file_name+'.csv')
drafts['cbb_gamelogs'] = drafts['cbb_gamelogs'].apply(lambda x: eval(x))
drafts['pro_gamelogs'] = drafts['pro_gamelogs'].apply(lambda x: eval(x))
drafts.Year = pd.to_datetime(drafts.Year, format='%Y') #dt format
drafts.DOB = pd.to_datetime(drafts.DOB) #dt format

#limit data scrape to desired years range
drafts_iter = drafts.loc[(pd.DatetimeIndex(drafts.Year).year.astype(int)>=first_year)&(pd.DatetimeIndex(drafts.Year).year.astype(int)<=last_year)]

In [647]:
ticB3 = time.perf_counter()

NBA_stats = [] #destination list

for i in iter(drafts_iter.index):
    try:
        for q in drafts.loc[i, 'pro_gamelogs']: #for each gamelog url
            x = pd.read_html(q)[7] #get table
            #conform column names
            x.rename(columns={'Unnamed: 7':'W/L', 'Unnamed: 8':'W/L', 'Unnamed: 5':'Location', 'Unnamed: 3':'Location'}, inplace=True)
            x.rename(columns=dict(zip(list(x.columns[x.columns.str.contains('Playoffs')==True]), cycle(['Date']))), inplace=True) 
            x['Player']=drafts.loc[i,'Player'] #get player name
            x['DOB']=drafts.loc[i,'DOB'] #get DOB
            if x.Rk.dtype != 'O':  #remove junk rows      
                x = x.loc[x.Rk.isna()!=True]
                NBA_stats.append(x)
            else:
                x = x.loc[x.Rk.str.isdigit()==True]
                NBA_stats.append(x)
    except:
        pass 
    #print(i)
NBA_game_stats = pd.concat(NBA_stats) #combine tables
NBA_game_stats = NBA_game_stats.loc[NBA_game_stats.MP.str.match(r'[^0-9]')!=True]
tocB3 = time.perf_counter()

print(f"{tocB3 - ticB3:0.4f} seconds")  #revised

133.3993 seconds


In [648]:
NBA_game_stats.drop(['Unnamed: 31'], axis=1, inplace=True) #remove unneeded cols
NBA_game_stats['MP'] = NBA_game_stats['MP'].apply(lambda x: minplayed(x)) 
NBA_game_stats.Date = NBA_game_stats.Date.astype('datetime64')

NBA_game_stats['DOB'] = NBA_game_stats['DOB'].astype('datetime64')    
NBA_game_stats.loc[NBA_game_stats['Age'].isna()!=True,'Age'] = NBA_game_stats.loc[NBA_game_stats['Age'].isna()!=True,'Age'].apply(lambda x: ages(x))
NBA_game_stats['Age'] = NBA_game_stats['Age'].astype(float)
NBA_game_stats = NBA_game_stats.merge(drafts[['Player', 'DOB', 'Year']], how = 'left', left_on=['Player', 'DOB'], right_on=['Player', 'DOB'], copy=False) #Bring in DOB (keep)
NBA_game_stats.loc[NBA_game_stats['Age'].isna()==True,'Age'] = ((NBA_game_stats.loc[NBA_game_stats.Age.isna()==True,'Date']-NBA_game_stats.loc[NBA_game_stats.Age.isna()==True,'DOB'])/np.timedelta64(1,'Y'))

NBA_game_stats['Years_pro'] = (NBA_game_stats.Date-NBA_game_stats.Year.astype('datetime64'))/np.timedelta64(1,'Y')
NBA_game_stats.Years_pro = NBA_game_stats.Years_pro - NBA_game_stats.Years_pro.min()

In [676]:
#save file
NBA_game_stats.to_csv(path+gamelog_file_name+str(first_year)+'_'+str(last_year)+'.csv', index=None) #save to folder in case failure
NBA_game_stats=pd.DataFrame()
files = os.listdir(path)
files = [f for f in files if gamelog_file_name in f]
for f in files:
    NBAp_current = pd.read_csv(path+f)
    NBAp_current.MP = pd.to_timedelta(NBAp_current.MP)
    NBAp_current.Date = pd.to_datetime(NBAp_current.Date)
    NBAp_current.DOB = pd.to_datetime(NBAp_current.DOB)
    try:
        NBAp_current['Draft_year'] = pd.to_datetime(NBAp_current.Year)
        NBAp_current['Draft_year'] = pd.DatetimeIndex(NBAp_current.Draft_year).year
        NBAp_current = NBAp_current.drop(['Year'], axis=1)
    except:
        NBAp_current['Draft_year'] = pd.to_datetime(NBAp_current.Draft_year, format= '%Y')
        NBAp_current['Draft_year'] = pd.DatetimeIndex(NBAp_current.Draft_year).year
        
    NBA_game_stats = pd.concat([NBA_game_stats, NBAp_current]).drop_duplicates(subset=['G', 'Date', 'Draft_year', 'Player', 'DOB'])
    print(f)
    
NBA_game_stats.MP = pd.to_timedelta(NBA_game_stats.MP)
NBA_game_stats.Date = pd.to_datetime(NBA_game_stats.Date)
NBA_game_stats.DOB = pd.to_datetime(NBA_game_stats.DOB)
NBA_game_stats['W/L_margin'] = NBA_game_stats['W/L'].apply(lambda x: outcomeq(x))
NBA_game_stats['W/L'] = NBA_game_stats['W/L'].apply(lambda x: outcome(x))
NBA_game_stats = NBA_game_stats.drop_duplicates(subset=['Date', 'Draft_year', 'Player', 'DOB'])

#add NBA season as column
finals_dates = ['6/20/2021','10/12/2020', '6/14/2019', '6/9/2018', '6/13/2017', '6/20/2016', '6/17/2015', '6/16/2014', '6/21/2013', 
                '6/22/2012', '6/13/2011', '6/18/2010', '6/15/2009', '6/18/2008', '6/15/2007', '6/21/2006', '6/24/2005', 
                '6/16/2004', '6/15/2003']

finals = pd.DataFrame([datetime.strptime(x, '%m/%d/%Y') for x in finals_dates]).sort_values(by=0).reset_index(drop=True)
NBA_game_stats['Season'] = pd.cut(NBA_game_stats.Date, finals[0], labels = pd.DatetimeIndex(finals[0]).year[:-1])
NBA_game_stats.to_csv(path+gamelog_file_name+'.csv', index=None)

nba1.csv
nba12003_2006.csv
nba12007_2010.csv
nba12011_2011.csv
nba12012_2015.csv
nba12016_2020.csv


In [None]:
#Adjust draft year back to year now that only now that calculations are complete

# drafts = pd.read_csv(path+drafts_file_name+'.csv')
# drafts.Year = pd.to_datetime(drafts.Year)
# drafts['Year'] = pd.DatetimeIndex(drafts.Year).year
# drafts.to_csv(path+drafts_file_name+'.csv', index=None)

# pre = pd.read_csv(path+preNBA_file_name+'.csv')
# pre['Draft_year'] = pd.to_datetime(pre.Year)
# pre['Draft_year'] = pd.DatetimeIndex(pre.Draft_year).year
# pre = pre.drop(['Year'], axis=1)
# pre = pre.drop_duplicates(subset=['DOB', 'Draft_year', 'Team', 'Rk', 'Player', 'Season', 'Date','Opponent'])
# pre.to_csv(path+preNBA_file_name+'.csv', index=None)

# nba = pd.read_csv(path+gamelog_file_name+'.csv')
# nba['Draft_year'] = pd.to_datetime(nba.Year)
# nba['Draft_year'] = pd.DatetimeIndex(nba.Draft_year).year
# nba = nba.drop(['Year'], axis=1)
# nba = nba.drop_duplicates(subset=['G', 'Date', 'Draft_year', 'Player', 'DOB'])
# nba.to_csv(path+gamelog_file_name+'.csv', index=None)