# Current Season Collector

In [2]:
def regularBoxScoreScraper(team,season):  
    '''
    INPUTS
    team - (string input) nba basketball team abbreviation, This should come from the TeamAbbreviation notebook
    season - (string input) This is a four character parameter yyyy
    ----------------------------------------------------------------
    OUTPUT
    df - this data frame contains advanced box score game logs
    '''
    #Creating our url string to go fetch website data
    url = 'https://www.basketball-reference.com/teams/'+team+'/'+season+'/gamelog/'
    data = pd.read_html(url)
    df = data[0]
    #calling our user agent variables to create a request header
    #headers = {"User-Agent":user_agent}
    #Initiating the web page and finding table contents
    #html = requests.get(url,headers=headers)
    #soup = BeautifulSoup(html.text, 'lxml')
    #table = soup.find('div', {'class':'table_wrapper'})
    #table_rows = table.find_all('tr')
    #df = pd.DataFrame(pd.read_html(str(table))[0])
    df['Date'] =df[('Unnamed: 2_level_0',                'Date')]
    df['Location'] = df[('Unnamed: 3_level_0',                'Unnamed: 3_level_1')]
    df['Opposition'] =df[('Unnamed: 4_level_0',                'Opp')]
    df['Team_Points'] =df[('Score',                'Tm')]
    df['Opponent_Points'] =df[('Score',                'Opp')]
    df['FTA']=df[('Team',                'FTA')]
    df['FT_Perc']=df[('Team',                'FT%')]
    df['FG']=df[('Team',                'FG')]
    df['FGA_RAW']=df[('Team',                'FGA')]
    df['ThreeP']=df[('Team',                '3P')]
    df['ThreePA']=df[('Team',                '3PA')]
    df['ThreePA_Perc']=df[('Team',                '3P%')]
    df['Offensive_Rebounds']=df[('Team',                'ORB')]
    df['Total_Rebounds']=df[('Team',                'TRB')]
    df['Assists']=df[('Team',                'AST')]
    df['Steals']=df[('Team',                'STL')]
    df['Blocks']=df[('Team',                'BLK')]
    df['TurnOvers']=df[('Team',                'TOV')]
    df['Personal_Fouls']=df[('Team',                'PF')]
    #Elimiating the fake column header row
    df.columns = df.columns.droplevel(1)
    #The next list is making sure we are only keeping the data fields we care about
    df = df[[

            'Date',
            'Location',
            'Opposition',
            'Team_Points',
            'Opponent_Points',
            'FTA',
            'FT_Perc',
            'FG',
            'FGA_RAW',
            'ThreeP',
            'ThreePA',
            'ThreePA_Perc',
            'Offensive_Rebounds',
            'Total_Rebounds',
            'Assists',
            'Steals',
            'Blocks',
            'TurnOvers',
            'Personal_Fouls'
            ]]
    #Renamib the unnamed column headers
    df.rename(columns={ 'Opposition':'Opponent'
                        
                       }, inplace = True
              )
    df = df[df['Opponent'].notnull()]
    df = df[df.Opponent != 'Opp']
    df['Team'] = str(team)
    df['Points_Scored'] = df['Team_Points'].astype(int) + df['Opponent_Points'].astype(int)
    df['FGA'] = df['FGA_RAW'].astype(int) - df['ThreePA'].astype(int)
    
    df['FG_Perc'] = (df['FG'].astype(int) - df['ThreeP'].astype(int))/df['FGA'].astype(int)
    
    df["Location"].fillna("VS", inplace = True)
    df = df[[

            'Date',
            'Location',
            'Team',
            'Opponent',
            'Team_Points',
            'Opponent_Points',
            'Points_Scored',
            'FTA',
            'FT_Perc',
            'FG',
            'FGA',
            'FG_Perc',
            'ThreeP',
            'ThreePA',
            'ThreePA_Perc',
            'Offensive_Rebounds',
            'Total_Rebounds',
            'Assists',
            'Steals',
            'Blocks',
            'TurnOvers',
            'Personal_Fouls'
            ]]
    return df

In [3]:
def advancedBoxScoreScraper(team,season):  
    '''
    INPUTS
    team - (string input) nba basketball team abbreviation, This should come from the TeamAbbreviation notebook
    season - (string input) This is a four character parameter yyyy
    ----------------------------------------------------------------
    OUTPUT
    df - this data frame contains advanced box score game logs
    '''
    #Creating our url string to go fetch website data
    url = 'https://www.basketball-reference.com/teams/'+team+'/'+season+'/gamelog-advanced/'
    data = pd.read_html(url)
    df = data[0]
    #calling our user agent variables to create a request header
    #headers = {"User-Agent":user_agent}
    #Initiating the web page and finding table contents
    #html = requests.get(url,headers=headers)
    #soup = BeautifulSoup(html.text, 'lxml')
    #table = soup.find('div', {'class':'table_wrapper'})
    #table_rows = table.find_all('tr')
    #df = pd.DataFrame(pd.read_html(str(table))[0])
    #Creating our feature based on the offensive/defensive four factors categories
    df['Offensive_EFG']=df[('Offensive Four Factors',                'eFG%')]
    df['Defensive_EFG']=df[('Defensive Four Factors',                'eFG%')]
    df['Offensive_TurnOver_Rate'] = df[('Offensive Four Factors',                'TOV%')]
    df['Defensive_TurnOver_Rate'] = df[('Defensive Four Factors',                'TOV%')]
    df['Offensive_Rebounding_Rate'] = df[('Offensive Four Factors',                'ORB%')]
    df['Defensive_Rebounding_Rate'] = df[('Defensive Four Factors',                'DRB%')]
    #Creating our features based on the advanced categories
    df['OffensiveRating']=df[('Advanced',                'ORtg')]
    df['DefensiveRating']=df[('Advanced',                'DRtg')]
    df['Pace']=df[('Advanced',                'Pace')]
    df['FreeThrowAttemptRate']=df[('Advanced',                'FTr')]
    df['ThreePointAttemptRate']=df[('Advanced',                '3PAr')]
    df['TrueShootingRate']=df[('Advanced',                'TS%')]
     # Score attributes
    df['Result']=df[('Score',                'Rslt')]
    df['Team_Score']=df[('Score',                'Tm')]
    df['Opponent_Score']=df[('Score',                'Opp')]
    #Elimiating the fake column header row
    df.columns = df.columns.droplevel(1)
    #Renamib the unnamed column headers
    df.rename(columns={ 'Unnamed: 0_level_0': "Rank"
                       ,'Unnamed: 1_level_0': "Game"
                       ,'Unnamed: 2_level_0': "Date"
                        ,'Unnamed: 3_level_0':'Location'
                       ,'Unnamed: 4_level_0':'Opponent'
                       # ,'':"Result"
                       #,'Unnamed: 6_level_0':"Team_Score"
                       #,'Unnamed: 7_level_0':"Opponent_Score"

                       }, inplace = True
              )
    #The target table is defaulted to be broken out into pieces so every x rows, the column headers re appear
    # th next two lines remove this fake column header collumns
    df = df[df['Opponent'].notnull()]
    df = df[df.Opponent != 'Opp']
    df['Team'] = str(team)
    #The next list is making sure we are only keeping the data fields we care about
    df = df[[
            'Game',
            'Date',
            'Location',
            'Team',
            'Opponent',
            'Result',
            'Team_Score',
            'Opponent_Score',
            'Offensive_EFG',
            'Defensive_EFG',
            'Offensive_TurnOver_Rate',
            'Defensive_TurnOver_Rate',
            'Offensive_Rebounding_Rate',
            'Defensive_Rebounding_Rate',
            'OffensiveRating',
            'DefensiveRating',
            'Pace',
            'FreeThrowAttemptRate',
            'ThreePointAttemptRate',
            'TrueShootingRate'
            ]]
    df["Location"].fillna("VS", inplace = True)
    return df

### End of Notebook