## Data Collection

---

In [1]:
import pandas as pd
import time
import pickle
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
import glob
import os
import csv
from functools import reduce

In [2]:
def change_values(df):
    """
    change dataframe values from "-" to 0
    """
    mapping = {'-' : 0}
    replace_dict = {}
    for columns in df.columns:
        replace_dict[columns] = mapping
        
    return df.replace(replace_dict)

In [3]:
options = Options()
options.add_argument('-headless')

### a. Crawling Names of League / Team

In [4]:
def crawling_league_teams(team_id, api_delay_term=5):
    """
    get league name and team name
    
    parameter ---------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ------------------------------------------------------------------
    pandas dataframe columns=team_id, team_name
    """
    
    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    
    # wait get league team datas
    time.sleep(api_delay_term) 
    
    # make pandas dataframe
    team_df = pd.DataFrame(columns=["team_id","team_name"])
    
    # get team datas
    teams = driver.find_elements(By.CSS_SELECTOR, '#teams option')
    for team in teams:
        team_name = team.text
        team_id = team.get_attribute("value").split("/")[2]
        team_df.loc[len(team_df)] = {"team_id" : team_id, "team_name" : team_name }
        
    # close webdriver
    driver.close()
    
    return change_values(team_df)

### b. Crawling Player Summary

In [5]:
def get_player_fifa_info(player_name):
    df = pd.read_csv("./players/player_data_full.csv")
    row = df.loc[df['name'] == player_name]
    return row

In [6]:
def crawling_player_summary(team_id, api_delay_term=5):
    """
    crawling player summary data 
        
    parameter -------------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ----------------------------------------------------------------------
    pandas dataframe
    columns = player_nuber, flag, name, age, position, tall, weight, full_time, half_time
    , mins, goals, asists, yel, red, spg, ps, motm, aw, rating
    
    """
    
    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)

    # wait for getting data
    time.sleep(api_delay_term)
    
    # make pandas dataframe
    player_summary_df = pd.DataFrame(columns=[
            "name", "starting_apps", "sub_apps", "mins", "goals", "asists", "yel", "red", "shots_per_game", "pass_accuracy",
            "motm", "aerial_duels_won", "rating",
            'positions', 'dob', 'value', 'wage', 'preferred_foot', 'profile_id', 'club_id', 'kit_number', 'joined', 
            'contract_valid_until', 'flag', 'image'
        ])
    
    # get player summay datas
    elements = driver.find_elements(By.CSS_SELECTOR, '#player-table-statistics-body tr')
    for element in elements:
        
        # split full time games and half time games
        games = element.find_elements(By.CSS_SELECTOR, "td")[4].text
        games = games.split("(")
        full_time, half_time = games[0], 0
        if len(games) > 1 :
            half_time = games[1].replace(")","")
        else :
            half_time = 0
        
        # player dictionary data
        name = element.find_elements(By.CSS_SELECTOR, "td")[0].find_elements(By.CSS_SELECTOR, "a")[0].find_elements(By.CSS_SELECTOR, "span")[0].text

        row = get_player_fifa_info(player_name=name)
        player_dict = {
            "name": name,
            "starting_apps": full_time,
            "sub_apps": half_time,
            "mins": element.find_elements(By.CSS_SELECTOR, "td")[5].text,
            "goals": element.find_elements(By.CSS_SELECTOR, "td")[6].text,
            "asists": element.find_elements(By.CSS_SELECTOR, "td")[7].text,
            "yel": element.find_elements(By.CSS_SELECTOR, "td")[8].text,
            "red": element.find_elements(By.CSS_SELECTOR, "td")[9].text,
            "shots_per_game": element.find_elements(By.CSS_SELECTOR, "td")[10].text,
            "pass_accuracy": element.find_elements(By.CSS_SELECTOR, "td")[11].text,
            "aerial_duels_won": element.find_elements(By.CSS_SELECTOR, "td")[12].text,
            "motm": element.find_elements(By.CSS_SELECTOR, "td")[13].text,
            "rating": element.find_elements(By.CSS_SELECTOR, "td")[14].text,
            'positions': row['positions'].values[0] if len(row['positions'].values) > 0 else '',
            'dob': row['dob'].values[0] if len(row['dob'].values) > 0 else '',
            'value': row['value'].values[0] if len(row['value'].values) > 0 else '',
            'wage': row['wage'].values[0] if len(row['wage'].values) > 0 else '',
            'preferred_foot': row['profile_preferred_foot'].values[0] if len(row['profile_preferred_foot'].values) > 0 else '',
            'profile_id': row['profile_id'].values[0] if len(row['profile_id'].values) > 0 else '',
            'club_id': row['club_id'].values[0] if len(row['club_id'].values) > 0 else '',
            'club_logo': 'https://cdn.futbin.com/content/fifa24/img/clubs/{club_id}.png'.format(club_id=row['club_id'].values[0]) if len(row['club_id'].values) > 0 else '',
            'kit_number': row['club_kit_number'].values[0] if len(row['club_kit_number'].values) > 0 else '',
            'joined': row['club_joined'].values[0] if len(row['club_joined'].values) > 0 else '',
            'contract_valid_until': row['club_contract_valid_until'].values[0] if len(row['club_contract_valid_until'].values) > 0 else '',
            'flag': row['country_flag'].values[0] if len(row['country_flag'].values) > 0 else '',
            'image': 'https://cdn.futbin.com/content/fifa24/img/players/{profile_id}.png'.format(profile_id=row['profile_id'].values[0])  if len(row['profile_id'].values) > 0 else '',
        }

        
        player_summary_df.loc[len(player_summary_df)] = player_dict
    
    # close webdriver
    driver.close()
    
    return change_values(player_summary_df)

### c. Crawling Player Defensive

In [7]:
def crawling_player_defensive(team_id, api_delay_term=5):
    """
    crawling player defensive data 
        
    parameter -------------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ----------------------------------------------------------------------
    pandas dataframe
    columns = player_number, tackles, inter, fouls, offsides, clear, drb, blocks, owng
    
    """
    
    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)

    # wait for getting data
    time.sleep(api_delay_term)

    # click event for getting defensive data
    driver.find_elements(By.CSS_SELECTOR, "#team-squad-stats-options .in-squad-detailed-view")[0].find_elements(By.CSS_SELECTOR, "a")[0].click()
    
    # wait for getting data
    time.sleep(api_delay_term)

    # make pandas dataframe
    player_defensive_df = pd.DataFrame(columns=[
            'name', 'tackles', 'interceptions', 'fouls', 'offsides_won', 'clearances', 'dribbled_past', 'blocks', 'own_goals'
        ])
    
    # get player summay datas
    elements = driver.find_elements(By.CSS_SELECTOR, "#statistics-table-defensive #player-table-statistics-body tr")

    for element in elements:
        name = element.find_elements(By.CSS_SELECTOR, "td")[0].find_elements(By.CSS_SELECTOR, "a")[0].find_elements(By.CSS_SELECTOR, "span")[0].text
        # player dictionary data
        player_dict = { 
            "name": name, 
            "tackles": element.find_elements(By.CSS_SELECTOR, "td")[6].text,
            "interceptions": element.find_elements(By.CSS_SELECTOR, "td")[7].text,
            "fouls": element.find_elements(By.CSS_SELECTOR, "td")[8].text,
            "offsides_won": element.find_elements(By.CSS_SELECTOR, "td")[9].text,
            "clearances": element.find_elements(By.CSS_SELECTOR, "td")[10].text,
            "dribbled_past": element.find_elements(By.CSS_SELECTOR, "td")[11].text,
            "blocks": element.find_elements(By.CSS_SELECTOR, "td")[12].text,
            "own_goals": element.find_elements(By.CSS_SELECTOR, "td")[13].text
        }
        
        player_defensive_df.loc[len(player_defensive_df)] = player_dict
        
    
    # close webdriver
    driver.close()
    
    return change_values(player_defensive_df)

### d. Crawling Player Offensive

In [8]:
def crawling_player_offensive(team_id, api_delay_term=5):
    
    """
    crawling player offensive data 
        
    parameter -------------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ----------------------------------------------------------------------
    pandas dataframe
    columns = player_number, keyp, fouled, off, disp, unstch
    
    """  

    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    
    # wait for getting data
    time.sleep(api_delay_term)
    
    # click event for getting data
    driver.find_elements(By.CSS_SELECTOR, "#team-squad-stats-options .in-squad-detailed-view")[1].find_elements(By.CSS_SELECTOR, "a")[0].click()
    
    # wait for getting data
    time.sleep(api_delay_term)
    
    # make pandas dataframe
    player_offensive_df = pd.DataFrame(columns=['name', 'key_passes', 'dribbles', 'fouled', 'offsides', 'possession_lost', 'bad_touch'])
    
    # get player offensive datas
    elements = driver.find_elements(By.CSS_SELECTOR, "#statistics-table-offensive #player-table-statistics-body tr")
    for element in elements:
        name = element.find_elements(By.CSS_SELECTOR, "td")[0].find_elements(By.CSS_SELECTOR, "a")[0].find_elements(By.CSS_SELECTOR, "span")[0].text

        player_dict = {
            "name": name, 
            "key_passes": element.find_elements(By.CSS_SELECTOR, "td")[9].text, 
            "dribbles": element.find_elements(By.CSS_SELECTOR, "td")[10].text, 
            "fouled": element.find_elements(By.CSS_SELECTOR, "td")[11].text, 
            "offsides": element.find_elements(By.CSS_SELECTOR, "td")[12].text,
            "possession_lost": element.find_elements(By.CSS_SELECTOR, "td")[13].text,
            "bad_touch": element.find_elements(By.CSS_SELECTOR, "td")[14].text,
        }
        
        player_offensive_df.loc[len(player_offensive_df)] = player_dict
        
    # close webdriver
    driver.close()
    
    return change_values(player_offensive_df)

### e. Crawling Player Passing

In [9]:
def crawling_player_passing(team_id, api_delay_term=5):
    
    """
    crawling player passing data 
        
    parameter -------------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ----------------------------------------------------------------------
    pandas dataframe belong player's ability
    player_number, avgp, ps, crosses, longb, thrb
    
    """ 
    
    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    
    # wait for gettig data
    time.sleep(api_delay_term)
    
    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#team-squad-stats-options .in-squad-detailed-view")[2].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)
    
    # make pnadas dateframe
    player_passing_df = pd.DataFrame(columns=[
            "name", "pass_per_game", "accuracy", "crosses", "long_balls", "through_balls"
        ])

    # get data
    elements = driver.find_elements(By.CSS_SELECTOR, "#statistics-table-passing #player-table-statistics-body tr")
    for element in elements:
        name = element.find_elements(By.CSS_SELECTOR, "td")[0].find_elements(By.CSS_SELECTOR, "a")[0].find_elements(By.CSS_SELECTOR, "span")[0].text
        player_dict = {
            "name": name,  
            "pass_per_game": element.find_elements(By.CSS_SELECTOR, "td")[8].text, 
            "accuracy": element.find_elements(By.CSS_SELECTOR, "td")[9].text, 
            "crosses": element.find_elements(By.CSS_SELECTOR, "td")[10].text,
            "long_balls": element.find_elements(By.CSS_SELECTOR, "td")[11].text,
            "through_balls": element.find_elements(By.CSS_SELECTOR, "td")[12].text,
        }        
        player_passing_df.loc[len(player_passing_df)] = player_dict
    
    # close webdriver
    driver.close()
    
    return change_values(player_passing_df)

In [10]:
def crawling_team_summary(team_id, api_delay_term=5):
    """
    crawling team summary data 
        
    parameter -------------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ----------------------------------------------------------------------
    pandas dataframe
    columns = ...
    
    """
    
    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)

    # wait for getting data
    time.sleep(api_delay_term)
    
    # make pandas dataframe
    team_summary_df = pd.DataFrame(columns=[
            "league", 'matches', 'goals_scored', 'shots', 'yellow_cards', 'red_cards',
            'possession', 'pass_accuracy', 'aerial_duels_won', 'rating'
        ])
    
    # get team summay data
    elements = driver.find_elements(By.CSS_SELECTOR, '#top-team-stats-summary-content tr')
    for element in elements:

        league_dict = {
            "league": element.find_elements(By.CSS_SELECTOR, "td")[0].text,
            "matches": element.find_elements(By.CSS_SELECTOR, "td")[1].text, 
            "goals_scored": element.find_elements(By.CSS_SELECTOR, "td")[2].text,
            "shots": element.find_elements(By.CSS_SELECTOR, "td")[3].text,
            "yellow_cards": element.find_elements(By.CSS_SELECTOR, "td")[4].find_elements(By.CSS_SELECTOR, "span")[0].text,
            "red_cards": element.find_elements(By.CSS_SELECTOR, "td")[4].find_elements(By.CSS_SELECTOR, "span")[1].text,
            "possession": element.find_elements(By.CSS_SELECTOR, "td")[5].text,
            "pass_accuracy": element.find_elements(By.CSS_SELECTOR, "td")[6].text,
            "aerial_duels_won": element.find_elements(By.CSS_SELECTOR, "td")[7].text,
            "rating": element.find_elements(By.CSS_SELECTOR, "td")[8].text,
        }
        
        team_summary_df.loc[len(team_summary_df)] = league_dict
    
    # close webdriver
    driver.close()
    
    return change_values(team_summary_df)

In [11]:
def crawling_team_defensive_stats(team_id, api_delay_term=5):
    
    """
    crawling team data 
        
    parameter -------------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ----------------------------------------------------------------------
    pandas dataframe
    
    """ 
    
    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    
    # wait for gettig data
    time.sleep(api_delay_term)
    
    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#top-team-stats-options li")[1].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)
    
    # make pandas dataframe
    team_summary_df = pd.DataFrame(columns=[
            "league", 'shots_conceded', 'tackles', 'interceptions',
            'fouls_conceded', 'offsides'
        ])
    
    # get team summay data
    elements = driver.find_elements(By.CSS_SELECTOR, '#top-team-stats-summary-content tr')
    for element in elements:

        league_dict = {
            "league": element.find_elements(By.CSS_SELECTOR, "td")[0].text,
            "shots_conceded": element.find_elements(By.CSS_SELECTOR, "td")[2].text,
            "tackles": element.find_elements(By.CSS_SELECTOR, "td")[3].text,
            "interceptions": element.find_elements(By.CSS_SELECTOR, "td")[4].text,
            "fouls_conceded": element.find_elements(By.CSS_SELECTOR, "td")[5].text,
            "offsides": element.find_elements(By.CSS_SELECTOR, "td")[3].text,
        }
        
        team_summary_df.loc[len(team_summary_df)] = league_dict
    
    # close webdriver
    driver.close()
    
    return change_values(team_summary_df)

In [12]:
def crawling_team_offensive_stats(team_id, api_delay_term=5):
    
    """
    crawling team data 
        
    parameter -------------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ----------------------------------------------------------------------
    pandas dataframe
    
    """ 
    
    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    
    # wait for gettig data
    time.sleep(api_delay_term)
    
    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#top-team-stats-options li")[2].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)
    
    # make pandas dataframe
    team_summary_df = pd.DataFrame(columns=[
            "league", 'shots_on_target', 'dribbles_won', 'fouls_won'
        ])
    
    # get team summay data
    elements = driver.find_elements(By.CSS_SELECTOR, '#top-team-stats-summary-content tr')
    for element in elements:
        
        league_dict = {
            "league": element.find_elements(By.CSS_SELECTOR, "td")[0].text,
            "shots_on_target": element.find_elements(By.CSS_SELECTOR, "td")[3].text,
            "dribbles_won": element.find_elements(By.CSS_SELECTOR, "td")[4].text,
            "fouls_won": element.find_elements(By.CSS_SELECTOR, "td")[5].text
        }
        
        team_summary_df.loc[len(team_summary_df)] = league_dict
    
    # close webdriver
    driver.close()
    
    return change_values(team_summary_df)

### f. Save Scraped Data

In [13]:
def save_league_teams(league_name, team_id):    
    league_teams = crawling_league_teams(team_id)
    league_teams.to_csv("./league/" + league_name + ".csv", index=False)
    return league_teams

In [14]:
def make_team_info(team_id):
    """
    cralwing team data and merge 
    
    parameter ----------------------------------------------
    team_id : int or str, you want to get team_id of players
    team_name : str, team_name
    
    return -------------------------------------------------
    merged dataframe
    
    """
    
    # excute player datas crawling functions 
    team_summary_df = crawling_team_summary(team_id)
    team_defensive_df = crawling_team_defensive_stats(team_id)
    team_offensive_df = crawling_team_offensive_stats(team_id)
    
    # merge dataframes
    merged_df = reduce(lambda x,y: pd.merge(x,y, on='league', how='outer'), [team_summary_df,
                                                                           team_defensive_df,
                                                                           team_offensive_df])
    
    return merged_df

In [15]:
def update_league_teams(league_name):
    # get league team_id team_name dataframe
    league_df = pd.read_csv("./league/" + league_name + ".csv")
    
    # get player dataframe function
    def get_team_df(team_id, team_name):
        team_df = make_team_info(team_id)
        print('{} - mp: {} - g: {}'.format(team_name, team_df.loc[1, 'matches'], team_df.loc[1, 'goals_scored']))
        team_df = team_df.dropna(how='all')
        team_df.to_csv("./teams/" + team_name + ".csv", index=False)
    
    # for one of league teams
    for idx, row in league_df.iterrows():
        try_again_num = 0
        print('=' * 60) 
        print('{0} {1}.'.format(idx + 1, row.team_name))
        
        team_df = make_team_info(row.team_id)
        team_df = team_df.dropna()
        print('{} - mp: {} - g: {}'.format(row.team_name, team_df.loc[len(team_df)-1, 'matches'], team_df.loc[len(team_df)-1, 'goals_scored']))
        team_df.to_csv("./teams/" + row.team_name + ".csv", index=False)

        print("\n{0} Done".format(row.team_name))
    print(league + " Teams Done!")

In [16]:
def make_player_info(team_id, team_name):
    """
    cralwing player data and merge player data(summary, defensive, offensive, passing)
    
    parameter ----------------------------------------------
    team_id : int or str, you want to get team_id of players
    team_name : str, team_name
    
    return -------------------------------------------------
    merged dataframe
    
    """
    
    # excute player datas crawling functions 
    player_summary_df = crawling_player_summary(team_id)
    player_defensive_df = crawling_player_defensive(team_id)
    player_offensive_df = crawling_player_offensive(team_id)
    player_passing_df = crawling_player_passing(team_id)
    
    # merge dataframes
    merged_df = reduce(lambda x,y: pd.merge(x,y, on='name', how='outer'), [player_summary_df,
                                                                           player_offensive_df,
                                                                           player_defensive_df,
                                                                           player_passing_df])
    
    # add team name
    merged_df['team_name'] = team_name
    
    return merged_df
    

def save_players_in_the_league(league):
    """
    make player data and save
    
    prameter  -----------------------------------------------------
    league : (str) league name
    
    """
    
    # get league team_id team_name dataframe
    league_team_df = pd.read_csv("./league/" + league + ".csv")
    
    # get player dataframe function
    def get_player_df(league, team_id, team_name):
        players_df = make_player_info(team_id, team_name)
        players_df.to_csv("./players/" + league + "/" + team_name + ".csv", index=False)
        return players_df
    
    # for one of league teams
    for idx, row in league_team_df.iterrows():
        try_again_num = 0
        print('=' * 60) 
        print('{0} {1}.'.format(idx + 1, row.team_name))

        players_df = []
        
        # there is no player data, try crawling more 3 times
        while len(players_df) == 0 and try_again_num < 2:
            if try_again_num > 0:
                print("Try Again! : player_data error")
            try_again_num += 1
            players_df = get_player_df(league, row.team_id, row.team_name)

        print('\n')
        print("The number of saved players : {0}".format(len(players_df)))
        print("{0} Done".format(row.team_name))    
        
    print(league + " Players Done!")

### g. Merge CSV files

In [17]:
def concat_csv(path, new_file_name):
    path = path
    allfiles = glob.glob(os.path.join(path + "*.csv"))
    frame = pd.DataFrame()
    list_ = []
    for file_ in allfiles:
        df = pd.read_csv(file_, index_col=None, header=0)
        list_.append(df)
        
    concat_df = pd.concat(list_, ignore_index=True)
    concat_df.to_csv(new_file_name)
    print("success")

In [18]:
#league_teams = save_league_teams(league_name='Premier League', team_id=32)

In [19]:
# update_league_teams(league_name='Premier League')

In [21]:
save_players_in_the_league(league='Premier League')

1 Arsenal.


The number of saved players : 23
Arsenal Done
2 Aston Villa.


The number of saved players : 23
Aston Villa Done
3 Bournemouth.


The number of saved players : 25
Bournemouth Done
4 Brentford.


The number of saved players : 25
Brentford Done
5 Brighton.


The number of saved players : 25
Brighton Done
6 Burnley.


The number of saved players : 25
Burnley Done
7 Chelsea.


The number of saved players : 23
Chelsea Done
8 Crystal Palace.


The number of saved players : 22
Crystal Palace Done
9 Everton.


The number of saved players : 23
Everton Done
10 Fulham.


The number of saved players : 24
Fulham Done
11 Liverpool.


The number of saved players : 22
Liverpool Done
12 Luton.


The number of saved players : 23
Luton Done
13 Manchester City.


The number of saved players : 25
Manchester City Done
14 Manchester United.


The number of saved players : 25
Manchester United Done
15 Newcastle.


The number of saved players : 25
Newcastle Done
16 Nottingham Forest.


The number 

In [76]:
def crawling_team_statistics(team_id, api_delay_term=5):
    
    """
    crawling team goal types
        
    parameter -------------------------------------------------------------------
    team_id : (int or str) team_id
    api_delay_term = (optional) 5
    
    return ----------------------------------------------------------------------
    pandas dataframe
    
    """ 
    
    # connect webdriver
    url = "https://www.whoscored.com/Teams/" + str(team_id)
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    
    # wait for gettig data
    time.sleep(api_delay_term)
    
    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#sub-navigation li")[2].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)
    
    ############################
    ## 01. GOALS
    
    # get goals for
    gf_elements = driver.find_elements(By.CSS_SELECTOR, '#team-goals-content tr')
    team_dict = {
        "open_play_goals": gf_elements[0].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "set_piece_goals": gf_elements[1].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "counter_attack_goals": gf_elements[2].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "penalties": gf_elements[3].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "own_goals_scored": gf_elements[4].find_elements(By.CSS_SELECTOR, "td")[1].text,
    }
    
    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#team-goals-filter-against")[0].find_elements(By.CSS_SELECTOR, "a")[1].click()

    # wait for gettig data
    time.sleep(api_delay_term)

    # get goals against
    ga_elements = driver.find_elements(By.CSS_SELECTOR, '#team-goals-content tr')
    team_dict.update({
        "open_play_goals_conceded": ga_elements[0].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "set_piece_goals_conceded": ga_elements[1].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "counter_attack_goals_conceded": ga_elements[2].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "penalties_conceded": ga_elements[3].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "own_goals": ga_elements[4].find_elements(By.CSS_SELECTOR, "td")[1].text,
    })


    ####################
    ## 02. GOALS AT HOME

    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#team-goals-filter-field dd")[1].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)
    
    # get goals for home
    gfh_elements = driver.find_elements(By.CSS_SELECTOR, '#team-goals-content tr')
    opf_home = gfh_elements[0].find_elements(By.CSS_SELECTOR, "td")[1].text
    spf_home = gfh_elements[1].find_elements(By.CSS_SELECTOR, "td")[1].text
    penf_home = gfh_elements[2].find_elements(By.CSS_SELECTOR, "td")[1].text
    caf_home = gfh_elements[3].find_elements(By.CSS_SELECTOR, "td")[1].text
    ogf_home = gfh_elements[4].find_elements(By.CSS_SELECTOR, "td")[1].text
    
    team_dict.update({
        "total_goals_for_home": int(opf_home) + int(spf_home) + int(penf_home) + int(caf_home) + int(ogf_home)
    })
    
    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#team-goals-filter-against dd")[1].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)

    # get goals against home
    gah_elements = driver.find_elements(By.CSS_SELECTOR, '#team-goals-content tr')
    opa_home = gah_elements[0].find_elements(By.CSS_SELECTOR, "td")[1].text
    spa_home = gah_elements[1].find_elements(By.CSS_SELECTOR, "td")[1].text
    pena_home = gah_elements[2].find_elements(By.CSS_SELECTOR, "td")[1].text
    caa_home = gah_elements[3].find_elements(By.CSS_SELECTOR, "td")[1].text
    oga_home = gah_elements[4].find_elements(By.CSS_SELECTOR, "td")[1].text
    

    team_dict.update({
        "total_goals_against_home": int(opa_home) + int(spa_home) + int(pena_home) + int(caa_home) + int(oga_home)
    })


    ####################
    ## 02. GOALS AWAY

    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#team-goals-filter-field dd")[2].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)
    
    # get goals for home
    gfa_elements = driver.find_elements(By.CSS_SELECTOR, '#team-goals-content tr')
    opf_away = gfa_elements[0].find_elements(By.CSS_SELECTOR, "td")[1].text
    spf_away = gfa_elements[1].find_elements(By.CSS_SELECTOR, "td")[1].text
    penf_away = gfa_elements[2].find_elements(By.CSS_SELECTOR, "td")[1].text
    caf_away = gfa_elements[3].find_elements(By.CSS_SELECTOR, "td")[1].text
    ogf_away = gfa_elements[4].find_elements(By.CSS_SELECTOR, "td")[1].text
    
    team_dict.update({
        "total_goals_for_away": int(opf_away) + int(spf_away) + int(penf_away) + int(caf_away) + int(ogf_away)
    })
    
    
    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#team-goals-filter-against dd")[1].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)

    # get goals against home
    gaa_elements = driver.find_elements(By.CSS_SELECTOR, '#team-goals-content tr')
    opa_away = gaa_elements[0].find_elements(By.CSS_SELECTOR, "td")[1].text
    spa_away = gaa_elements[1].find_elements(By.CSS_SELECTOR, "td")[1].text
    pena_away = gaa_elements[2].find_elements(By.CSS_SELECTOR, "td")[1].text
    caa_away = gaa_elements[3].find_elements(By.CSS_SELECTOR, "td")[1].text
    oga_away = gaa_elements[4].find_elements(By.CSS_SELECTOR, "td")[1].text
    
    team_dict.update({
        "total_goals_against_away": int(opa_away) + int(spa_away) + int(pena_away) + int(caa_away) + int(oga_away)
    })

    ####################
    ## 04. PASS TYPES

    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#team-situation-stats-options li")[1].find_elements(By.CSS_SELECTOR, "a")[0].click()

    # wait for gettig data
    time.sleep(api_delay_term)
    
    # get goals for
    pf_elements = driver.find_elements(By.CSS_SELECTOR, '#team-passes-content tr')
    team_dict.update({
        "short_passes": pf_elements[0].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "long_balls": pf_elements[1].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "crosses": pf_elements[2].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "through_balls": pf_elements[3].find_elements(By.CSS_SELECTOR, "td")[1].text
    })
    
    # click event for gettig data
    driver.find_elements(By.CSS_SELECTOR, "#team-passes-filter-against")[0].find_elements(By.CSS_SELECTOR, "a")[1].click()

    # wait for gettig data
    time.sleep(api_delay_term)

    # get goals against
    pa_elements = driver.find_elements(By.CSS_SELECTOR, '#team-passes-content tr')
    team_dict.update({
        "short_passes_against": pa_elements[0].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "long_balls_against": pa_elements[1].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "crosses_against": pa_elements[2].find_elements(By.CSS_SELECTOR, "td")[1].text,
        "through_balls_against": pa_elements[3].find_elements(By.CSS_SELECTOR, "td")[1].text
    })

    
    # close webdriver
    driver.close()
    
    return team_dict

In [75]:
crawling_team_statistics(32)

{'open_play_goals': '8',
 'set_piece_goals': '3',
 'counter_attack_goals': '1',
 'penalties': '1',
 'own_goals_scored': '0',
 'open_play_goals_conceded': '8',
 'set_piece_goals_conceded': '3',
 'counter_attack_goals_conceded': '2',
 'penalties_conceded': '2',
 'own_goals': '1',
 'total_goals_for_home': 10,
 'total_goals_against_home': 10,
 'total_goals_for_away': 6,
 'total_goals_against_away': 6,
 'short_passes': '467',
 'long_balls': '49',
 'crosses': '20',
 'through_balls': '2',
 'short_passes_against': '392',
 'long_balls_against': '51',
 'crosses_against': '16',
 'through_balls_against': '2'}

In [77]:
crawling_team_statistics(32)

{'open_play_goals': '8',
 'set_piece_goals': '3',
 'counter_attack_goals': '1',
 'penalties': '1',
 'own_goals_scored': '0',
 'open_play_goals_conceded': '8',
 'set_piece_goals_conceded': '3',
 'counter_attack_goals_conceded': '2',
 'penalties_conceded': '2',
 'own_goals': '1',
 'total_goals_for_home': 10,
 'total_goals_against_home': 10,
 'total_goals_for_away': 6,
 'total_goals_against_away': 6,
 'short_passes': '467',
 'long_balls': '49',
 'crosses': '20',
 'through_balls': '2',
 'short_passes_against': '392',
 'long_balls_against': '51',
 'crosses_against': '16',
 'through_balls_against': '2'}