# Fotmob Scraper

In [None]:
import requests, json, re
from bs4 import BeautifulSoup

import pandas as pd
import numpy as np

In [None]:
def get_player_stats(name, link):
    page = requests.get(link)
    soup = BeautifulSoup(page.content, "html.parser")

    # get height, age, market value, preferred_foot
    input_tag = soup.find_all(attrs={"name" : "description"})
    substring = input_tag[0]['content']

    # check if stat exists
    def check_stat(stat_find):
        if stat_find != None: 
            stat = stat_find.group(1)
        else:
            stat = np.nan
        return stat

    find_height = re.search('Height: (.+?) cm', substring)
    height = check_stat(find_height)

    find_age = re.search('Age: (.+?). ', substring)
    age = check_stat(find_age)

    find_value = re.search('€(.+?)M', substring)
    value = check_stat(find_value)

    find_preferred_foot = re.search('foot: (.+?). ', substring)
    preferred_foot = check_stat(find_preferred_foot)


    # get career stats data
    script = soup.find('script', {'type': 'application/json', 'id':re.compile(r'^((?!tb-djs).)*$')})
    career = json.loads(script.get_text(strip=True))['props']['pageProps']['initialState']['player']['careerStatistics']

    # get stats for 2021-2022 season
    stats_2021 = career[0]['seasons'][0]['stats'][0]['statsArr']
    print(stats_2021)

    # check if attribute exists
    def check_attribute(stats, attribute_name, counter):
        # check if counter is at end of stats list
        if counter == len(stats):
            attribute = np.nan
            return attribute, counter

        if attribute_name in stats[counter]:
            if attribute_name == 'Average rating':
                attribute = stats[counter][1]['num']
                counter +=1
            else:
                attribute = stats[counter][1]
                counter +=1
        else:
            attribute = np.nan
        return attribute, counter

    
    counter = 0
    average_rating_2021, counter = check_attribute(stats_2021, 'Average rating', counter)
    matches_started_2021, counter = check_attribute(stats_2021, 'Matches started', counter)
    subbed_in_2021, counter = check_attribute(stats_2021, 'Subbed in', counter)
    subbed_out_2021, counter = check_attribute(stats_2021, 'Subbed out', counter)
    minutes_played_2021, counter = check_attribute(stats_2021, 'Minutes played', counter)
    yellow_cards_2021, counter = check_attribute(stats_2021, 'Yellow cards', counter)
    goals_2021, counter = check_attribute(stats_2021, 'Goals', counter)
    expected_goals_2021, counter = check_attribute(stats_2021, 'Expected goals (xG)', counter)
    goals_in_box_2021, counter = check_attribute(stats_2021, 'Goals inside box', counter)
    goals_out_box_2021, counter = check_attribute(stats_2021, 'Goals outside box', counter)
    penalty_goals_2021, counter = check_attribute(stats_2021, 'Penalty goals', counter)
    headed_goals_2021, counter = check_attribute(stats_2021, 'Headed goals', counter)
    right_foot_goals_2021, counter = check_attribute(stats_2021, 'Right footed goals', counter)
    left_foot_goals_2021, counter = check_attribute(stats_2021, 'Left footed goals', counter)
    shots_on_target_2021, counter = check_attribute(stats_2021, 'Shots on target', counter)
    shots_off_target_2021, counter = check_attribute(stats_2021, 'Shots off target', counter)
    assists_2021, counter = check_attribute(stats_2021, 'Assists', counter)
    key_passes_2021, counter = check_attribute(stats_2021, 'Key passes', counter)
    total_passes_2021, counter = check_attribute(stats_2021, 'Total passes', counter)
    attempted_dribbles_2021, counter = check_attribute(stats_2021, 'Attempted dribbles', counter)
    successful_dribbles_2021, counter = check_attribute(stats_2021, 'Successful dribbles', counter)
    attempted_tackles_2021, counter = check_attribute(stats_2021, 'Attempted tackles', counter)
    successful_tackles_2021, counter = check_attribute(stats_2021, 'Successful tackles', counter)
    clearances_2021, counter = check_attribute(stats_2021, 'Clearances', counter)
    interceptions_won_2021, counter = check_attribute(stats_2021, 'Interceptions won', counter)


    # get stats for 2020-2021 season, accounts for if playing in different league
    if len(career[0]['seasons']) > 1:
        if career[0]['seasons'][1]['name'] == "2020 - 2021":
            stats_2020 = career[0]['seasons'][1]['stats'][0]['statsArr']
        else:
            stats_2020 = career[1]['seasons'][0]['stats'][0]['statsArr']
    else:
        stats_2020 = career[1]['seasons'][0]['stats'][0]['statsArr']
  

    counter = 0
    average_rating_2020, counter = check_attribute(stats_2020, 'Average rating', counter)
    matches_started_2020, counter = check_attribute(stats_2020, 'Matches started', counter)
    subbed_in_2020, counter = check_attribute(stats_2020, 'Subbed in', counter)
    subbed_out_2020, counter = check_attribute(stats_2020, 'Subbed out', counter)
    minutes_played_2020, counter = check_attribute(stats_2020, 'Minutes played', counter)
    yellow_cards_2020, counter = check_attribute(stats_2020, 'Yellow cards', counter)
    goals_2020, counter = check_attribute(stats_2020, 'Goals', counter)
    expected_goals_2020, counter = check_attribute(stats_2020, 'Expected goals (xG)', counter)
    goals_in_box_2020, counter = check_attribute(stats_2020, 'Goals inside box', counter)
    goals_out_box_2020, counter = check_attribute(stats_2020, 'Goals outside box', counter)
    penalty_goals_2020, counter = check_attribute(stats_2020, 'Penalty goals', counter)
    headed_goals_2020, counter = check_attribute(stats_2020, 'Headed goals', counter)
    right_foot_goals_2020, counter = check_attribute(stats_2020, 'Right footed goals', counter)
    left_foot_goals_2020, counter = check_attribute(stats_2020, 'Left footed goals', counter)
    shots_on_target_2020, counter = check_attribute(stats_2020, 'Shots on target', counter)
    shots_off_target_2020, counter = check_attribute(stats_2020, 'Shots off target', counter)
    assists_2020, counter = check_attribute(stats_2020, 'Assists', counter)
    key_passes_2020, counter = check_attribute(stats_2020, 'Key passes', counter)
    total_passes_2020, counter = check_attribute(stats_2020, 'Total passes', counter)
    attempted_dribbles_2020, counter = check_attribute(stats_2020, 'Attempted dribbles', counter)
    successful_dribbles_2020, counter = check_attribute(stats_2020, 'Successful dribbles', counter)
    attempted_tackles_2020, counter = check_attribute(stats_2020, 'Attempted tackles', counter)
    successful_tackles_2020, counter = check_attribute(stats_2020, 'Successful tackles', counter)
    clearances_2020, counter = check_attribute(stats_2020, 'Clearances', counter)
    interceptions_won_2020, counter = check_attribute(stats_2020, 'Interceptions won', counter)

    attribute_list = [name, height, age, value, preferred_foot, average_rating_2021, matches_started_2021,
                     subbed_in_2021, subbed_out_2021, minutes_played_2021, yellow_cards_2021, goals_2021,
                     expected_goals_2021, goals_in_box_2021, goals_out_box_2021, penalty_goals_2021, headed_goals_2021, right_foot_goals_2021,
                     left_foot_goals_2021, shots_on_target_2021, shots_off_target_2021, assists_2021, key_passes_2021, total_passes_2021,
                     attempted_dribbles_2021, successful_dribbles_2021, attempted_tackles_2021, successful_tackles_2021, 
                     clearances_2021, interceptions_won_2021, average_rating_2020, matches_started_2020,
                     subbed_in_2020, subbed_out_2020, minutes_played_2020, yellow_cards_2020, goals_2020,
                     expected_goals_2020, goals_in_box_2020, goals_out_box_2020, penalty_goals_2020, headed_goals_2020, right_foot_goals_2020,
                     left_foot_goals_2020, shots_on_target_2020, shots_off_target_2020, assists_2020, key_passes_2020, total_passes_2020,
                     attempted_dribbles_2020, successful_dribbles_2020, attempted_tackles_2020, successful_tackles_2020, 
                     clearances_2020, interceptions_won_2020]
    print(attribute_list)
    return attribute_list

In [None]:
combined_list = []

# Run the below 3 cells multiple times for different players

In [None]:
# Replace link here with players to get
player_stats = get_player_stats("Andrea La Mantia", "https://www.fotmob.com/players/195485/andrea-la-mantia")

[['Average rating', {'num': 6.58, 'bgcolor': '#f08022'}], ['Matches started', 1], ['Subbed in', 12], ['Subbed out', 1], ['Minutes played', 306], ['Goals', 2], ['Expected goals (xG)', '0.9'], ['Goals inside box', 2], ['Headed goals', 1], ['Left footed goals', 1], ['Shots on target', 2], ['Shots off target', 4], ['Assists', 1], ['Key passes', 6], ['Total passes', 74], ['Attempted tackles', 2], ['Clearances', 3], ['Interceptions won', 1]]
['Andrea La Mantia', '190', '30', '1.6', 'Right', 6.58, 1, 12, 1, 306, nan, 2, '0.9', 2, nan, nan, 1, nan, 1, 2, 4, 1, 6, 74, nan, nan, 2, nan, 3, 1, 7.08, 23, 11, 19, 2027, 3, 11, nan, 11, nan, nan, 5, 5, 1, 36, 42, 3, 30, 455, 6, 2, 10, 7, 20, 8]


In [None]:
combined_list.append(player_stats)

In [None]:
# for checking which names have been added
for item in combined_list:
  print(item[0])

Brahim Diaz
Daniel Maldini
Rafael Leao
Ante Rebic
Alexis Saelemaekers
Junior Messias
Samuel Castillejo
Zlatan Ibrahimovic
Olivier Giroud
Lorenzo Insigne
Hirving Lozano
Matteo Politano
Adam Ounas
Victor Osimhen
Andrea Petagna
Dries Mertens
Ivan Perisic
Joaquin Correa
Lautaro Martinez
Edin Dzeko
Alexis Sanchez
Felipe Caicedo
Federico Chiesa
Federico Bernardeschi
Paulo Dybala
Dusan Vlahovic
Alvaro Morata
Moise Kean
Lorenzo Pellegrini
Henrikh Mkhitaryan
Stephan El Shaarawy
Nicolo Zaniolo
Carles Perez
Carles Perez
Tammy Abraham
Eldor Shomurodov
Felix Afena-Gyan
Ruslan Malinovsky
Matteo Pessina
Aleksey Miranchuk
Jeremie Boga
Josip Ilicic
Duvan Zapata
Luis Muriel
Mattia Zaccagni
Jovane Cabral
Raul Moro
Felipe Anderson
Pedro Rodriguez
Ciro Immobile
Sergej Milinkovic-Savic
Luis Alberto
Nicolas Gonzalez
Riccardo Sottil
Jose Callejon
Aleksandr Kokorin
Hamed Junior Traore
Filip Djuricic
Emil Konradsen Ceide
Nicolas Schiappacasse
Domenico Berardi
Gianluca Scamacca
Giacomo Raspadori
Gregoire Defrel


# Create Dataframe

In [None]:
# convert the combined list to dataframe
df=pd.DataFrame(combined_list,columns=["name", "height", "age", "value", "preferred_foot", "average_rating_2021", "matches_started_2021",
                     "subbed_in_2021", "subbed_out_2021", "minutes_played_2021", "yellow_cards_2021", "goals_2021",
                     "expected_goals_2021", "goals_in_box_2021", "goals_out_box_2021", "penalty_goals_2021", "headed_goals_2021", "right_foot_goals_2021",
                     "left_foot_goals_2021", "shots_on_target_2021", "shots_off_target_2021", "assists_2021", "key_passes_2021", "total_passes_2021",
                     "attempted_dribbles_2021", "successful_dribbles_2021", "attempted_tackles_2021", "successful_tackles_2021", 
                     "clearances_2021", "interceptions_won_2021", "average_rating_2020", "matches_started_2020",
                     "subbed_in_2020", "subbed_out_2020", "minutes_played_2020", "yellow_cards_2020", "goals_2020",
                     "expected_goals_2020", "goals_in_box_2020", "goals_out_box_2020", "penalty_goals_2020", "headed_goals_2021", "right_foot_goals_2020",
                     "left_foot_goals_2020", "shots_on_target_2020", "shots_off_target_2020", "assists_2020", "key_passes_2020", "total_passes_2020",
                     "attempted_dribbles_2020", "successful_dribbles_2020", "attempted_tackles_2020", "successful_tackles_2020", 
                     "clearances_2020", "interceptions_won_2020"])
df

Unnamed: 0,name,height,age,value,preferred_foot,average_rating_2021,matches_started_2021,subbed_in_2021,subbed_out_2021,minutes_played_2021,...,shots_off_target_2020,assists_2020,key_passes_2020,total_passes_2020,attempted_dribbles_2020,successful_dribbles_2020,attempted_tackles_2020,successful_tackles_2020,clearances_2020,interceptions_won_2020
0,Brahim Diaz,170,22,23,Left,6.85,21.0,4.0,20.0,1565,...,11.0,3.0,20.0,466.0,50.0,25.0,18.0,10.0,2.0,2.0
1,Daniel Maldini,181,20,2.2,Right,6.93,2.0,6.0,2.0,148,...,,,,13.0,2.0,2.0,1.0,,,
2,Rafael Leao,188,22,55,Right,7.39,23.0,3.0,15.0,1902,...,16.0,6.0,27.0,460.0,65.0,40.0,13.0,6.0,12.0,4.0
3,Ante Rebic,185,28,28,Right,6.59,6.0,12.0,3.0,692,...,11.0,4.0,36.0,399.0,41.0,20.0,26.0,9.0,15.0,7.0
4,Alexis Saelemaekers,180,22,19,,6.90,17.0,13.0,12.0,1516,...,15.0,4.0,30.0,803.0,61.0,37.0,43.0,26.0,2.0,25.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,Simone Zaza,186,30,2.2,Left,6.12,,10.0,,129,...,29.0,,13.0,312.0,18.0,7.0,15.0,9.0,5.0,3.0
76,Nedim Bajrami,179,23,11,Right,6.95,20.0,9.0,16.0,1731,...,18.0,8.0,56.0,705.0,114.0,49.0,13.0,6.0,2.0,9.0
77,Andrea Pinamonti,186,22,17,Right,6.71,26.0,4.0,7.0,2329,...,2.0,,2.0,51.0,3.0,2.0,,,2.0,1.0
78,Patrick Cutrone,184,24,4.5,Right,6.39,12.0,11.0,9.0,1181,...,2.0,,,50.0,,,4.0,3.0,,1.0


In [None]:
df.to_csv("fotmob.csv")
from google.colab import files
files.download("fotmob.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>