In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import re
import requests

In [2]:
def convert_num_nam(string):
    "Given a string that is a concatenation of a jersey number and the name, return the two substring seperate"

    num = [letter for letter in string if letter.isdigit()]
    num = "".join(map(str, num))
    nam = [letter for letter in string if not letter.isdigit()]
    nam = "".join(map(str, nam))

    return num, nam
    
def get_field_df(soup):
    field_ratings = []
    field_players = []
    field_numbers = []
    for field_player in soup.find_all(class_=re.compile("LineupPlayerContainer")):
        rating_element = field_player.find_all(class_=re.compile("PlayerRatingStyled"))
        if len(rating_element) == 0:
            field_ratings.append("nan")
        else:
            field_ratings.append(rating_element[0].find_all("span")[0].get_text())
        
        player_text = field_player.find(class_=re.compile("LineupPlayerText"))
        num, name = convert_num_nam(player_text.get_text())
        field_players.append(name)
        field_numbers.append(num)

    return pd.DataFrame({"number": field_numbers, "name": field_players, "rating": field_ratings})
    
def get_bench_df(soup):
    bench_ratings = []
    bench_players = []
    bench_numbers = []
    for subsoup in soup.find_all(class_=re.compile("LeftBenchItem ")):  # the space is very important in the regex
        if 'PlayerRating' in str(subsoup):
            # extract name and number
            num, name = convert_num_nam((subsoup.find_all('span')[-2].get_text()))
            bench_players.append(name)
            bench_numbers.append(num)

            # get rating
            rating_element = subsoup.find(class_=re.compile("PlayerRatingStyled"))
            bench_ratings.append(rating_element.find('span').get_text())

    return pd.DataFrame({"number": bench_numbers, "name": bench_players, "rating": bench_ratings})

In [28]:
page = requests.get("https://www.fotmob.com/match/3602683/matchfacts/fc-utrecht-vs-go-ahead-eagles")
soup = BeautifulSoup(page.content, 'html.parser')

bench_matchcard_urls = []

for bench_item in soup.find_all(class_=re.compile("LeftBenchItemOuter")):
    if 'PlayerRating' in str(bench_item):
        bench_matchcard_urls.append(f"https://www.fotmob.com{bench_item.find('a')['href']}")

In [33]:
bench_matchcard_urls

['https://www.fotmob.com/match/3602683/matchfacts/player-match-card/594983',
 'https://www.fotmob.com/match/3602683/matchfacts/player-match-card/304668',
 'https://www.fotmob.com/match/3602683/matchfacts/player-match-card/562846',
 'https://www.fotmob.com/match/3602683/matchfacts/player-match-card/902105',
 'https://www.fotmob.com/match/3602683/matchfacts/player-match-card/17761',
 'https://www.fotmob.com/match/3602683/matchfacts/player-match-card/957556',
 'https://www.fotmob.com/match/3602683/matchfacts/player-match-card/467480']

In [30]:
matchcard = requests.get(bench_matchcard_urls[0])
mc_soup = BeautifulSoup(matchcard.content, 'html.parser')

In [51]:
name = mc_soup.find(class_=re.compile("jalymf-PlayerName")).get_text()
mc_soup.find_all(class_=re.compile("RowContainer e1fnykti"))[2:]

[<li class="css-6pfmj0-RowContainer e1fnykti7"><span>Minutes played</span><span>30</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Goals</span><span>0</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Assists</span><span>0</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Total shots</span><span>2</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Accurate passes</span><span>6/7 (86%)</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Key passes</span><span>2</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Expected goals (xG)</span><span>0.25</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Expected assists (xA)</span><span>0.05</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Attack</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Shot accuracy</span><span>0/2 (0%)</span></li>,
 <li class="css-6pfmj0-RowContainer e1fnykti7"><span>Big chan