# Advanced Box Score Scrapper (Team Level)

In [2]:
#%run ../Componenets/00_ProjectImports.ipynb
#%run ../Componenets/01_UserAgentInformation.ipynb



In [3]:
def advancedBoxScoreScraper(team,season):  
    '''
    INPUTS
    team - (string input) nba basketball team abbreviation, This should come from the TeamAbbreviation notebook
    season - (string input) This is a four character parameter yyyy
    ----------------------------------------------------------------
    OUTPUT
    df - this data frame contains advanced box score game logs
    '''
    #Creating our url string to go fetch website data
    url = 'https://www.basketball-reference.com/teams/'+team+'/'+season+'/gamelog-advanced/'
    #calling our user agent variables to create a request header
    headers = {"User-Agent":user_agent}
    #Initiating the web page and finding table contents
    html = requests.get(url,headers=headers)
    soup = BeautifulSoup(html.text, 'lxml')
    table = soup.find('div', {'class':'table_wrapper'})
    table_rows = table.find_all('tr')
    df = pd.DataFrame(pd.read_html(str(table))[0])
    #Creating our feature based on the offensive/defensive four factors categories
    df['Offensive_EFG']=df[('Offensive Four Factors',                'eFG%')]
    df['Defensive_EFG']=df[('Defensive Four Factors',                'eFG%')]
    df['Offensive_TurnOver_Rate'] = df[('Offensive Four Factors',                'TOV%')]
    df['Defensive_TurnOver_Rate'] = df[('Defensive Four Factors',                'TOV%')]
    df['Offensive_Rebounding_Rate'] = df[('Offensive Four Factors',                'ORB%')]
    df['Defensive_Rebounding_Rate'] = df[('Defensive Four Factors',                'DRB%')]
    #Creating our features based on the advanced categories
    df['OffensiveRating']=df[('Advanced',                'ORtg')]
    df['DefensiveRating']=df[('Advanced',                'DRtg')]
    df['Pace']=df[('Advanced',                'Pace')]
    df['FreeThrowAttemptRate']=df[('Advanced',                'FTr')]
    df['ThreePointAttemptRate']=df[('Advanced',                '3PAr')]
    df['TrueShootingRate']=df[('Advanced',                'TS%')]
    # Score attributes
    df['Result']=df[('Score',                'Result')]
    df['Team_Score']=df[('Score',                'Tm')]
    df['Opponent_Score']=df[('Score',                'Opp')]
    #Elimiating the fake column header row
    df.columns = df.columns.droplevel(1)
    #Renamib the unnamed column headers
    df.rename(columns={ 'Unnamed: 0_level_0': "Rank"
                       ,'Unnamed: 1_level_0': "Game"
                       ,'Unnamed: 2_level_0': "Date"
                        ,'Unnamed: 3_level_0':'Location'
                       ,'Unnamed: 4_level_0':'Opponent'
                     #   ,'Unnamed: 5_level_0':"Result"
                     #  ,'Unnamed: 6_level_0':"Team_Score"
                     #  ,'Unnamed: 7_level_0':"Opponent_Score"

                       }, inplace = True
              )
    #The target table is defaulted to be broken out into pieces so every x rows, the column headers re appear
    # th next two lines remove this fake column header collumns
    df = df[df['Opponent'].notnull()]
    df = df[df.Opponent != 'Opp']
    df['Team'] = str(team)
    #The next list is making sure we are only keeping the data fields we care about
    df = df[[
            'Game',
            'Date',
            'Location',
            'Team',
            'Opponent',
            'Result',
            'Team_Score',
            'Opponent_Score',
            'Offensive_EFG',
            'Defensive_EFG',
            'Offensive_TurnOver_Rate',
            'Defensive_TurnOver_Rate',
            'Offensive_Rebounding_Rate',
            'Defensive_Rebounding_Rate',
            'OffensiveRating',
            'DefensiveRating',
            'Pace',
            'FreeThrowAttemptRate',
            'ThreePointAttemptRate',
            'TrueShootingRate'
            ]]
    df["Location"].fillna("VS", inplace = True)
    return df