In [1]:
import pandas as pd
import numpy as np
import sys
import os
import bs4
from IPython.display import display_html,clear_output, HTML
import re
from datetime import datetime
import ast
import itertools
from tqdm import tqdm,trange
from file_tools import *
from request_tools import *
from parse_tools import *


pd.set_option('display.max_columns', None)
pd.set_option('display.min_rows', 20)
pd.set_option('compute.use_numexpr', False)


![](./__images__/20_001.jpg)

In [None]:
# q: How to insert image into markdown cell in jupyter notebook?
# a: ![title](path/to/image.png)

In [2]:
gamelog_year_html = './01-data-html/players/b/bookede01/gamelog/2023.html'
html_text,html_soup = load_html(gamelog_year_html)
html_text = clean_html_text(html_text)
html_soup = bs4.BeautifulSoup(html_text, 'html.parser')
html_regular = html_soup.find('table', {'id': 'pgl_basic'})
html_playoffs = html_soup.find('table', {'id': 'pgl_basic_playoffs'})

gamelog_year_html = './01-data-html/players/b/bookede01/gamelog-advanced/2023.html'
html_text,html_soup = load_html(gamelog_year_html)
html_text = clean_html_text(html_text)
html_soup = bs4.BeautifulSoup(html_text, 'html.parser')
html_advanced_regular = html_soup.find('table', {'id': 'pgl_advanced'})
html_advanced_playoffs = html_soup.find('table', {'id': 'pgl_advanced_playoffs'})
# html_basic_playoffs


In [44]:
# Tasks
def __parse_player_gamelog_basic_advanced_table__(html_text):
#    1. Extract links from table - # 2. Insert links into table
    DF_PLYR_GL = parse_html_table(html_text)
    df_gamelog_basic_links = parse_html_table(html_text,extract_links='body').applymap(lambda x: x[-1])

    DF_PLYR_GL.insert(3,'Boxscore_id',df_gamelog_basic_links['Date'])
    DF_PLYR_GL.insert(6,'Tm_id',df_gamelog_basic_links['Tm'])
    DF_PLYR_GL.insert(9,'Opp_id',df_gamelog_basic_links['Opp'])

    DF_PLYR_GL = DF_PLYR_GL[~DF_PLYR_GL['Boxscore_id'].isin(['/boxscores/201606190CLE.html'])] # Fix for 201606190CLE - This game does not exist, 201606190GSW is the correct game

    # 3. Remove unnnecessary header rows
    GAME_FILTER = DF_PLYR_GL.loc[:,'Rk'].astype(str).str.isnumeric().fillna(False)
    DF_PLYR_GL = DF_PLYR_GL.loc[GAME_FILTER]
    DF_PLYR_GL.insert(2,'GP',DF_PLYR_GL.loc[:,'G'].astype(str).str.isnumeric().fillna(False).astype(int))
    # 4. Rename H/A, convert to boolean
    GAME_HM_AW = DF_PLYR_GL.pop('Unnamed: 5').isna().astype(int)
    DF_PLYR_GL.insert(8,'H/A',GAME_HM_AW)
    DF_PLYR_GL
    # 5. Rename W/L, split into 2 columns, convert to boolean and int
    GAME_RESULT = DF_PLYR_GL.pop('Unnamed: 7').str.extract(r'([WL]) \(([+-]\d+)\)',expand=True)
    DF_PLYR_GL.insert(11,'W/L',GAME_RESULT[0].replace({'W':1,'L':0}))
    DF_PLYR_GL.insert(12,'Pts_diff',GAME_RESULT[1].astype(int))
    DF_PLYR_GL
    # 6. Convert MP to float, and Inactive game stats to nan
    GP_FILTER = DF_PLYR_GL.loc[:,'GP'].astype(bool)
    # Deal with cases where MP is nan 
    if DF_PLYR_GL['MP'].notna().sum() > 0: # Added later to fix the all NAN case
        DF_PLYR_GL.loc[GP_FILTER & DF_PLYR_GL['MP'].notna(),'MP'] = DF_PLYR_GL.loc[GP_FILTER & DF_PLYR_GL['MP'].notna(),'MP'].str.split(':').apply(lambda x:float(x[0]) + float(x[1])/60)
    DF_PLYR_GL.loc[~GP_FILTER,'GS':] = np.nan
    # 7. Convert +/- to float (in basic table)
    if '+/-' in DF_PLYR_GL.columns:
        DF_PLYR_GL['+/-'] = DF_PLYR_GL['+/-'].astype(float)
    return DF_PLYR_GL

PLYR_GL_BAS_REG = __parse_player_gamelog_basic_advanced_table__(html_regular.prettify())
PLYR_GL_BAS_POFF = __parse_player_gamelog_basic_advanced_table__(html_playoffs.prettify())
PLYR_GL_ADV_REG = __parse_player_gamelog_basic_advanced_table__(html_advanced_regular.prettify())
PLYR_GL_ADV_POFF = __parse_player_gamelog_basic_advanced_table__(html_advanced_playoffs.prettify())



In [4]:
PLYR_GL_BAS_REG

Unnamed: 0,Rk,G,GP,Date,Boxscore_id,Age,Tm,Tm_id,H/A,Opp,Opp_id,W/L,Pts_diff,GS,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-
0,1,1,1,2022-10-19,/boxscores/202210190PHO.html,25-354,PHO,/teams/PHO/2023.html,1,DAL,/teams/DAL/2023.html,1,2,1,40.616667,10,20,.500,1,1,1.000,7,7,1.000,1,3,4,9,0,1,2,3,28,23.4,20.0
1,2,2,1,2022-10-21,/boxscores/202210210POR.html,25-356,PHO,/teams/PHO/2023.html,0,POR,/teams/POR/2023.html,0,-2,1,43.916667,11,23,.478,3,7,.429,8,9,.889,0,5,5,3,1,0,2,1,33,23.1,-2.0
2,3,3,1,2022-10-23,/boxscores/202210230LAC.html,25-358,PHO,/teams/PHO/2023.html,0,LAC,/teams/LAC/2023.html,1,17,1,34.816667,13,21,.619,5,9,.556,4,4,1.000,1,0,1,4,1,0,3,1,35,26.6,13.0
3,4,4,1,2022-10-25,/boxscores/202210250PHO.html,25-360,PHO,/teams/PHO/2023.html,1,GSW,/teams/GSW/2023.html,1,29,1,35.316667,10,19,.526,3,8,.375,11,14,.786,1,1,2,7,3,0,3,3,34,28.2,32.0
4,5,5,1,2022-10-28,/boxscores/202210280PHO.html,25-363,PHO,/teams/PHO/2023.html,1,NOP,/teams/NOP/2023.html,1,13,1,32.6,6,14,.429,0,4,.000,4,4,1.000,0,5,5,4,0,0,5,1,16,7.5,4.0
5,6,6,1,2022-10-30,/boxscores/202210300PHO.html,26-000,PHO,/teams/PHO/2023.html,1,HOU,/teams/HOU/2023.html,1,15,1,35.383333,14,24,.583,1,7,.143,1,3,.333,1,2,3,6,1,0,1,2,30,22.7,11.0
6,7,7,1,2022-11-01,/boxscores/202211010PHO.html,26-002,PHO,/teams/PHO/2023.html,1,MIN,/teams/MIN/2023.html,1,9,1,30.433333,6,18,.333,2,7,.286,4,5,.800,1,6,7,5,0,1,2,5,18,10.1,11.0
7,8,8,1,2022-11-04,/boxscores/202211040PHO.html,26-005,PHO,/teams/PHO/2023.html,1,POR,/teams/POR/2023.html,0,-2,1,38.366667,8,21,.381,3,7,.429,6,6,1.000,0,5,5,5,1,1,5,3,25,14.0,3.0
8,9,9,1,2022-11-05,/boxscores/202211050PHO.html,26-006,PHO,/teams/PHO/2023.html,1,POR,/teams/POR/2023.html,1,20,1,29.166667,9,20,.450,2,7,.286,4,4,1.000,1,2,3,3,1,2,1,2,24,17.6,19.0
9,10,10,1,2022-11-07,/boxscores/202211070PHI.html,26-008,PHO,/teams/PHO/2023.html,0,PHI,/teams/PHI/2023.html,0,-12,1,43.4,8,16,.500,1,1,1.000,11,14,.786,2,5,7,5,1,0,6,2,28,19.4,-9.0


In [None]:
SRC_DIR = './01-data-html/players/'
TGT_DIR = './02-data-parsed/players/'
# player_id = '/players/b/.html' # aminual01
def parse_all_player_gamelogs_from_dir(SRC_DIR,TGT_DIR):
    player_list = load_file('./00-data-facts/players_hrefs.txt').split('\n')
    TRANGE = trange(len(player_list),ncols=150)
    for alphabet_dir in sorted(get_all_folders(SRC_DIR)): # alphabet from a-z
        for player_dir in sorted(get_all_folders('/'.join([SRC_DIR,alphabet_dir]))): # player_id such as bookede01
            for gamelog_type in sorted(get_all_folders('/'.join([SRC_DIR,alphabet_dir,player_dir]))): # gamelog or gamelog-advanced
                for gamelog_year_html in sorted(get_all_files('/'.join([SRC_DIR,alphabet_dir,player_dir,gamelog_type]),file_type='html')):
                    TRANGE.set_description('/'.join([SRC_DIR,alphabet_dir,player_dir,gamelog_type,gamelog_year_html]),refresh=True)
                    html_text,html_soup = load_html('/'.join([SRC_DIR,alphabet_dir,player_dir,gamelog_type,gamelog_year_html]))
                    html_text = clean_html_text(html_text)
                    html_soup = bs4.BeautifulSoup(html_text, 'html.parser')
                    if gamelog_type == 'gamelog':
                        html_regular = html_soup.find('table', {'id': 'pgl_basic'})
                        html_playoffs = html_soup.find('table', {'id': 'pgl_basic_playoffs'})
                    elif gamelog_type == 'gamelog-advanced':
                        html_regular = html_soup.find('table', {'id': 'pgl_advanced'})
                        html_playoffs = html_soup.find('table', {'id': 'pgl_advanced_playoffs'})
                    if html_regular:
                        tgl_name = 'pgl_basic_regular' if gamelog_type == 'gamelog' else 'pgl_advanced_regular'
                        make_directory('/'.join([TGT_DIR,alphabet_dir,player_dir,tgl_name]))
                        PGL_REG = __parse_player_gamelog_basic_advanced_table__(html_regular.prettify())
                        PGL_REG.to_csv('/'.join([TGT_DIR,alphabet_dir,player_dir,tgl_name,gamelog_year_html.replace('.html','.csv')]),index=False)
                    if html_playoffs:
                        tgl_name = 'pgl_basic_playoffs' if gamelog_type == 'gamelog' else 'pgl_advanced_playoffs'
                        make_directory('/'.join([TGT_DIR,alphabet_dir,player_dir,tgl_name]))
                        PGL_POFF = __parse_player_gamelog_basic_advanced_table__(html_playoffs.prettify())
                        PGL_POFF.to_csv('/'.join([TGT_DIR,alphabet_dir,player_dir,tgl_name,gamelog_year_html.replace('.html','.csv')]),index=False)
            TRANGE.update(1)

parse_all_player_gamelogs_from_dir(SRC_DIR,TGT_DIR)


In [None]:
player_id = '/players/a/aminual01.html' # aminual01
html_text,html_soup = load_html(gamelog_year_html)
html_text = clean_html_text(html_text)
html_soup = bs4.BeautifulSoup(html_text, 'html.parser')
html_regular = html_soup.find('table', {'id': 'pgl_basic'})
html_playoffs = html_soup.find('table', {'id': 'pgl_basic_playoffs'})

Debug failed cases - Mostly in the early eras

In [24]:
# Find failed cases
SRC_DIR = './01-data-html/players/'
TGT_DIR = './02-data-parsed/players/'
get_all_files_recursive(SRC_DIR,file_type='html')[:100]

to_reparse = []
TQDM_SRC_FILES = tqdm(get_all_files_recursive(SRC_DIR,file_type='html'),ncols=150)
for src_file in TQDM_SRC_FILES:
    TQDM_SRC_FILES.set_description(src_file,refresh=True)
    # Get the corresponding parsed file:
    if 'gamelog' in src_file:
        tgt_file_regular = src_file.replace(SRC_DIR,TGT_DIR).replace('gamelog','pgl_basic_regular').replace('.html','.csv')
        tgt_file_playoffs = src_file.replace(SRC_DIR,TGT_DIR).replace('gamelog','pgl_basic_playoffs').replace('.html','.csv')
    elif 'gamelog-advanced' in src_file:
        tgt_file_regular = src_file.replace(SRC_DIR,TGT_DIR).replace('gamelog-advanced','pgl_advanced_regular').replace('.html','.csv')
        tgt_file_playoffs = src_file.replace(SRC_DIR,TGT_DIR).replace('gamelog-advanced','pgl_advanced_playoffs').replace('.html','.csv')
    else:
        continue
    if not os.path.isfile(tgt_file_playoffs):
        to_reparse.append(src_file)

print(len(to_reparse))

./01-data-html/players/z/zunicma01/gamelog/1949.html: 100%|███████████████████████████████████████████████████| 56850/56850 [00:23<00:00, 2470.01it/s]

40720





In [30]:
fixed_cases = load_file('./00-data-facts/fixed_player_gamelogs.txt').split('\n')
for fixed_case in fixed_cases:
    df = pd.read_csv(fixed_case.replace('..','.'))
    print(fixed_case)
    display(df)
    time.sleep(2)
    clear_output(wait=True)

../02-data-parsed/players//b/brownda01/pgl_advanced_regular/1949.csv


Unnamed: 0,Rk,G,GP,Date,Boxscore_id,Age,Tm,Tm_id,H/A,Opp,Opp_id,W/L,Pts_diff,GS,MP,TS%,ORB%,DRB%,TRB%,AST%,ORtg,DRtg
0,1,1,1,1948-11-04,/boxscores/194811040BLB.html,25-235,BLB,/teams/BLB/1949.html,1,MNL,/teams/MNL/1949.html,0,-12,,,,,,,,,
1,2,2,1,1948-11-06,/boxscores/194811060CHS.html,25-237,BLB,/teams/BLB/1949.html,0,CHS,/teams/CHS/1949.html,0,-13,,,,,,,,,
2,3,3,1,1948-11-13,/boxscores/194811130NYK.html,25-244,BLB,/teams/BLB/1949.html,0,NYK,/teams/NYK/1949.html,0,-4,,,,,,,,,


KeyboardInterrupt: 

In [None]:
failed_cases = load_file('./00-data-facts/failed_player_gamelogs.txt').split('\n')
for failed_case in failed_cases:
    print(failed_case)
    url = failed_case.replace('..','.')
    html_text,html_soup = load_html(url)
    html_text = clean_html_text(html_text)
    html_soup = bs4.BeautifulSoup(html_text, 'html.parser')
    

In [31]:
pd.read_csv('./02-data-parsed/players/g/greendr01/pgl_basic_playoffs/2016.csv')


Unnamed: 0,Rk,G,GP,Date,Boxscore_id,Age,Tm,Tm_id,H/A,Opp,Opp_id,W/L,Pts_diff,GS,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-
0,1,1.0,1,2016-04-16,/boxscores/201604160GSW.html,26-043,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1,26,1.0,33.283333,5.0,12.0,0.417,1.0,3.0,0.333,1.0,3.0,0.333,2.0,8.0,10.0,4.0,2.0,4.0,1.0,5.0,12.0,13.2,28.0
1,2,2.0,1,2016-04-18,/boxscores/201604180GSW.html,26-045,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1,9,1.0,39.066667,4.0,12.0,0.333,0.0,2.0,0.0,4.0,6.0,0.667,4.0,10.0,14.0,8.0,2.0,1.0,0.0,2.0,12.0,17.7,9.0
2,3,3.0,1,2016-04-21,/boxscores/201604210HOU.html,26-048,GSW,/teams/GSW/2016.html,0,HOU,/teams/HOU/2016.html,0,-1,1.0,37.266667,3.0,9.0,0.333,0.0,2.0,0.0,3.0,6.0,0.5,0.0,7.0,7.0,7.0,1.0,0.0,7.0,5.0,9.0,1.7,-7.0
3,4,4.0,1,2016-04-24,/boxscores/201604240HOU.html,26-051,GSW,/teams/GSW/2016.html,0,HOU,/teams/HOU/2016.html,1,27,1.0,35.85,7.0,12.0,0.583,4.0,6.0,0.667,0.0,2.0,0.0,1.0,7.0,8.0,6.0,1.0,0.0,4.0,4.0,18.0,14.0,34.0
4,5,5.0,1,2016-04-27,/boxscores/201604270GSW.html,26-054,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1,33,1.0,31.216667,5.0,9.0,0.556,2.0,5.0,0.4,3.0,4.0,0.75,0.0,9.0,9.0,8.0,1.0,2.0,0.0,2.0,15.0,20.2,32.0
5,6,6.0,1,2016-05-01,/boxscores/201605010GSW.html,26-058,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1,12,1.0,37.316667,6.0,14.0,0.429,2.0,5.0,0.4,9.0,9.0,1.0,4.0,9.0,13.0,11.0,1.0,3.0,2.0,2.0,23.0,29.1,24.0
6,7,7.0,1,2016-05-03,/boxscores/201605030GSW.html,26-060,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1,11,1.0,40.816667,7.0,20.0,0.35,0.0,5.0,0.0,3.0,4.0,0.75,2.0,12.0,14.0,7.0,1.0,4.0,2.0,4.0,17.0,15.5,19.0
7,8,8.0,1,2016-05-07,/boxscores/201605070POR.html,26-064,GSW,/teams/GSW/2016.html,0,POR,/teams/POR/2016.html,0,-12,1.0,40.716667,13.0,23.0,0.565,8.0,12.0,0.667,3.0,6.0,0.5,2.0,7.0,9.0,8.0,1.0,1.0,2.0,2.0,37.0,32.9,-2.0
8,9,9.0,1,2016-05-09,/boxscores/201605090POR.html,26-066,GSW,/teams/GSW/2016.html,0,POR,/teams/POR/2016.html,1,7,1.0,44.283333,5.0,9.0,0.556,2.0,3.0,0.667,9.0,10.0,0.9,1.0,8.0,9.0,5.0,4.0,7.0,2.0,5.0,21.0,27.8,8.0
9,10,10.0,1,2016-05-11,/boxscores/201605110GSW.html,26-068,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1,4,1.0,35.85,2.0,7.0,0.286,1.0,5.0,0.2,8.0,10.0,0.8,1.0,10.0,11.0,6.0,2.0,1.0,4.0,5.0,13.0,12.7,0.0


In [None]:
players/d/dellama01/gamelog-advanced/2016

In [37]:
url = './01-data-html/players/m/mcadoja01/gamelog-advanced/2016.html'
html_text,html_soup = load_html(url)
html_text = clean_html_text(html_text)
html_soup = bs4.BeautifulSoup(html_text, 'html.parser')
# HTML(html_text)
html_advanced_playoffs = html_soup.find('table', {'id': 'pgl_advanced_playoffs'})
html_text = html_advanced_playoffs.prettify()

DF_PLYR_GL = parse_html_table(html_text)
df_gamelog_basic_links = parse_html_table(html_text,extract_links='body').applymap(lambda x: x[-1])
DF_PLYR_GL.insert(3,'Boxscore_id',df_gamelog_basic_links['Date'])
DF_PLYR_GL.insert(6,'Tm_id',df_gamelog_basic_links['Tm'])
DF_PLYR_GL.insert(9,'Opp_id',df_gamelog_basic_links['Opp'])
# 3. Remove unnnecessary header rows
GAME_FILTER = DF_PLYR_GL.loc[:,'Rk'].astype(str).str.isnumeric().fillna(False)
DF_PLYR_GL = DF_PLYR_GL.loc[GAME_FILTER]
DF_PLYR_GL.insert(2,'GP',DF_PLYR_GL.loc[:,'G'].astype(str).str.isnumeric().fillna(False).astype(int))
# 4. Rename H/A, convert to boolean
GAME_HM_AW = DF_PLYR_GL.pop('Unnamed: 5').isna().astype(int)
DF_PLYR_GL.insert(8,'H/A',GAME_HM_AW)
DF_PLYR_GL
# 5. Rename W/L, split into 2 columns, convert to boolean and int
GAME_RESULT = DF_PLYR_GL.pop('Unnamed: 7').str.extract(r'([WL]) \(([+-]\d+)\)',expand=True)
DF_PLYR_GL.insert(11,'W/L',GAME_RESULT[0].replace({'W':1,'L':0}))
DF_PLYR_GL.insert(12,'Pts_diff',GAME_RESULT[1].astype(int))
DF_PLYR_GL
# 6. Convert MP to float, and Inactive game stats to nan
GP_FILTER = DF_PLYR_GL.loc[:,'GP'].astype(bool)
# Deal with cases where MP is nan 
if DF_PLYR_GL['MP'].notna().sum() > 0:
    DF_PLYR_GL.loc[GP_FILTER & DF_PLYR_GL['MP'].notna(),'MP'] = DF_PLYR_GL.loc[GP_FILTER & DF_PLYR_GL['MP'].notna(),'MP'].str.split(':').apply(lambda x:float(x[0]) + float(x[1])/60)
DF_PLYR_GL.loc[~GP_FILTER,'GS':] = np.nan
# # 7. Convert +/- to float (in basic table)
if '+/-' in DF_PLYR_GL.columns:
    DF_PLYR_GL['+/-'] = DF_PLYR_GL['+/-'].astype(float)
DF_PLYR_GL


Unnamed: 0,Rk,G,GP,Date,Boxscore_id,Age,Tm,Tm_id,H/A,Opp,Opp_id,W/L,GS,MP,TS%,eFG%,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,ORtg,DRtg,GmSc,BPM
0,1,,0,2016-04-16,/boxscores/201604160GSW.html,23-103,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1.0,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive
1,2,,0,2016-04-18,/boxscores/201604180GSW.html,23-105,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1.0,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play
2,3,1.0,1,2016-04-21,/boxscores/201604210HOU.html,23-108,GSW,/teams/GSW/2016.html,0,HOU,/teams/HOU/2016.html,0.0,0,7:34,.568,,0.0,49.8,26.7,0.0,0.0,22.7,0.0,5.3,123,76,3.2,11.1
3,4,2.0,1,2016-04-24,/boxscores/201604240HOU.html,23-111,GSW,/teams/GSW/2016.html,0,HOU,/teams/HOU/2016.html,1.0,0,5:50,.000,.000,0.0,0.0,0.0,17.9,8.0,0.0,25.8,28.5,20,83,-1.9,-21.2
4,5,3.0,1,2016-04-27,/boxscores/201604270GSW.html,23-114,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1.0,0,3:26,,,0.0,0.0,0.0,0.0,30.2,0.0,,0.0,0,38,2.0,26.9
5,6,4.0,1,2016-05-01,/boxscores/201605010GSW.html,23-118,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1.0,0,1:13,,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0,118,0.0,-7.5
6,7,,0,2016-05-03,/boxscores/201605030GSW.html,23-120,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1.0,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play
7,8,5.0,1,2016-05-07,/boxscores/201605070POR.html,23-124,GSW,/teams/GSW/2016.html,0,POR,/teams/POR/2016.html,0.0,0,1:22,,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0,136,0.0,-9.6
8,9,,0,2016-05-09,/boxscores/201605090POR.html,23-126,GSW,/teams/GSW/2016.html,0,POR,/teams/POR/2016.html,1.0,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive
9,10,,0,2016-05-11,/boxscores/201605110GSW.html,23-128,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1.0,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive


In [43]:
df_gamelog_basic_links

Unnamed: 0,Rk,G,Date,Age,Tm,Unnamed: 5,Opp,Unnamed: 7,GS,MP,TS%,eFG%,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,ORtg,DRtg,GmSc,BPM
0,,,/boxscores/201604160GSW.html,,/teams/GSW/2016.html,,/teams/HOU/2016.html,,,,,,,,,,,,,,,,,
1,,,/boxscores/201604180GSW.html,,/teams/GSW/2016.html,,/teams/HOU/2016.html,,,,,,,,,,,,,,,,,
2,,,/boxscores/201604210HOU.html,,/teams/GSW/2016.html,,/teams/HOU/2016.html,,,,,,,,,,,,,,,,,
3,,,/boxscores/201604240HOU.html,,/teams/GSW/2016.html,,/teams/HOU/2016.html,,,,,,,,,,,,,,,,,
4,,,/boxscores/201604270GSW.html,,/teams/GSW/2016.html,,/teams/HOU/2016.html,,,,,,,,,,,,,,,,,
5,,,/boxscores/201605010GSW.html,,/teams/GSW/2016.html,,/teams/POR/2016.html,,,,,,,,,,,,,,,,,
6,,,/boxscores/201605030GSW.html,,/teams/GSW/2016.html,,/teams/POR/2016.html,,,,,,,,,,,,,,,,,
7,,,/boxscores/201605070POR.html,,/teams/GSW/2016.html,,/teams/POR/2016.html,,,,,,,,,,,,,,,,,
8,,,/boxscores/201605090POR.html,,/teams/GSW/2016.html,,/teams/POR/2016.html,,,,,,,,,,,,,,,,,
9,,,/boxscores/201605110GSW.html,,/teams/GSW/2016.html,,/teams/POR/2016.html,,,,,,,,,,,,,,,,,


In [42]:
DF_PLYR_GL[~DF_PLYR_GL['Boxscore_id'].isin(['/boxscores/201606190CLE.html'])]

Unnamed: 0,Rk,G,GP,Date,Boxscore_id,Age,Tm,Tm_id,H/A,Opp,Opp_id,W/L,GS,MP,TS%,eFG%,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,ORtg,DRtg,GmSc,BPM
0,1,,0,2016-04-16,/boxscores/201604160GSW.html,23-103,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1.0,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive
1,2,,0,2016-04-18,/boxscores/201604180GSW.html,23-105,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1.0,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play
2,3,1.0,1,2016-04-21,/boxscores/201604210HOU.html,23-108,GSW,/teams/GSW/2016.html,0,HOU,/teams/HOU/2016.html,0.0,0,7:34,.568,,0.0,49.8,26.7,0.0,0.0,22.7,0.0,5.3,123,76,3.2,11.1
3,4,2.0,1,2016-04-24,/boxscores/201604240HOU.html,23-111,GSW,/teams/GSW/2016.html,0,HOU,/teams/HOU/2016.html,1.0,0,5:50,.000,.000,0.0,0.0,0.0,17.9,8.0,0.0,25.8,28.5,20,83,-1.9,-21.2
4,5,3.0,1,2016-04-27,/boxscores/201604270GSW.html,23-114,GSW,/teams/GSW/2016.html,1,HOU,/teams/HOU/2016.html,1.0,0,3:26,,,0.0,0.0,0.0,0.0,30.2,0.0,,0.0,0,38,2.0,26.9
5,6,4.0,1,2016-05-01,/boxscores/201605010GSW.html,23-118,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1.0,0,1:13,,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0,118,0.0,-7.5
6,7,,0,2016-05-03,/boxscores/201605030GSW.html,23-120,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1.0,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play,Did Not Play
7,8,5.0,1,2016-05-07,/boxscores/201605070POR.html,23-124,GSW,/teams/GSW/2016.html,0,POR,/teams/POR/2016.html,0.0,0,1:22,,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0,136,0.0,-9.6
8,9,,0,2016-05-09,/boxscores/201605090POR.html,23-126,GSW,/teams/GSW/2016.html,0,POR,/teams/POR/2016.html,1.0,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive
9,10,,0,2016-05-11,/boxscores/201605110GSW.html,23-128,GSW,/teams/GSW/2016.html,1,POR,/teams/POR/2016.html,1.0,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive


Parsing error for G

![](./__images__/20_002.jpg)