# Sofascore

Sofascore is a website that provides live scores, results, and fixtures for a variety of sports. It also provides statistics and player ratings for football matches.

It has some advanced statistics and event data we can use like shots and expected goals.

The unique thing about Sofascore is that it is a website that is set up terribly for scraping, but we can still do it thanks to APIs they use to load the data.

In [None]:
# !pip install playwright asyncio beautifulsoup4
# !playwright install

In [1]:
# We'll scrape all of the shot data from the Women's Champions League Final between Barcelona and Wolfsburg in 2023
url = 'https://www.sofascore.com/football/match/atletico-madrid-barcelona/rgbsLgb#id:12437550'

# in this url, the id for the match is 11253247
match_id = url.split('id:')[-1]

In [2]:
import asyncio
import json
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup

In [11]:
async def main():
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.goto(f'https://www.sofascore.com/api/v1/event/{match_id}/shotmap')
        content = await page.content()
        soup = BeautifulSoup(content, 'html.parser')
        data = json.loads(soup.select_one('pre').text)['shotmap']
        await browser.close()

        return data

In [13]:
result = await main()

In [15]:
print(result[0])

{'player': {'name': 'Ferran Torres', 'slug': 'ferran-torres', 'shortName': 'F. Torres', 'position': 'F', 'jerseyNumber': '7', 'userCount': 73150, 'id': 855833, 'fieldTranslations': {'nameTranslation': {'ar': 'فيرران توريس', 'hi': 'फ़ेरैन टोरेस', 'bn': 'ফেরান টরেস'}, 'shortNameTranslation': {'ar': 'ف. توريس', 'hi': 'एफ. टोरेस', 'bn': 'এফ. টরেস'}}}, 'isHome': False, 'shotType': 'goal', 'goalType': 'regular', 'situation': 'regular', 'playerCoordinates': {'x': 18.1, 'y': 54.3, 'z': 0}, 'bodyPart': 'right-foot', 'goalMouthLocation': 'low-right', 'goalMouthCoordinates': {'x': 0, 'y': 47.1, 'z': 12}, 'xg': 0.092332102358341, 'xgot': 0.31475684046745, 'id': 4600503, 'time': 90, 'addedTime': 8, 'timeSeconds': 5837, 'draw': {'start': {'x': 54.3, 'y': 18.1}, 'end': {'x': 52.9, 'y': 0}, 'goal': {'x': 52.9, 'y': 88}}, 'reversedPeriodTime': 1, 'reversedPeriodTimeSeconds': 463, 'periodTimeSeconds': 437, 'incidentType': 'shot'}


In [16]:
import pandas as pd

df = pd.DataFrame(result)

In [17]:
df.head(10)

Unnamed: 0,player,isHome,shotType,goalType,situation,playerCoordinates,bodyPart,goalMouthLocation,goalMouthCoordinates,xg,...,id,time,addedTime,timeSeconds,draw,reversedPeriodTime,reversedPeriodTimeSeconds,periodTimeSeconds,incidentType,blockCoordinates
0,"{'name': 'Ferran Torres', 'slug': 'ferran-torr...",False,goal,regular,regular,"{'x': 18.1, 'y': 54.3, 'z': 0}",right-foot,low-right,"{'x': 0, 'y': 47.1, 'z': 12}",0.092332,...,4600503,90,8.0,5837,"{'start': {'x': 54.3, 'y': 18.1}, 'end': {'x':...",1,463,437,shot,
1,"{'name': 'Lamine Yamal', 'slug': 'lamine-yamal...",False,goal,regular,assisted,"{'x': 21.3, 'y': 68.6, 'z': 0}",left-foot,low-centre,"{'x': 0, 'y': 51.3, 'z': 14.6}",0.021289,...,4600462,90,2.0,5486,"{'start': {'x': 68.6, 'y': 21.3}, 'end': {'x':...",1,814,86,shot,
2,"{'name': 'Alexander Sørloth', 'slug': 'alexand...",True,miss,,assisted,"{'x': 21.5, 'y': 37.2, 'z': 0}",left-foot,left,"{'x': 0, 'y': 59.7, 'z': 36.1}",0.055024,...,4600458,90,1.0,5401,"{'start': {'x': 37.2, 'y': 21.5}, 'end': {'x':...",1,899,1,shot,
3,"{'name': 'Robert Lewandowski', 'slug': 'robert...",False,miss,,assisted,"{'x': 18.9, 'y': 47.2, 'z': 0}",left-foot,high-left,"{'x': 0, 'y': 59.5, 'z': 43.1}",0.044712,...,4600423,87,,5203,"{'start': {'x': 47.2, 'y': 18.9}, 'end': {'x':...",4,197,2503,shot,
4,"{'name': 'Ferran Torres', 'slug': 'ferran-torr...",False,goal,regular,assisted,"{'x': 7.6, 'y': 54.2, 'z': 0}",head,high-right,"{'x': 0, 'y': 48, 'z': 20.3}",0.294084,...,4600398,78,,4637,"{'start': {'x': 54.2, 'y': 7.6}, 'end': {'x': ...",13,763,1937,shot,
5,"{'name': 'Lamine Yamal', 'slug': 'lamine-yamal...",False,save,,assisted,"{'x': 14.4, 'y': 72.2, 'z': 0}",left-foot,low-centre,"{'x': 0, 'y': 48.9, 'z': 19.6}",0.025378,...,4600342,76,,4556,"{'start': {'x': 72.2, 'y': 14.4}, 'block': {'x...",15,844,1856,shot,"{'x': 1.8, 'y': 53.6, 'z': 0}"
6,"{'name': 'Robert Lewandowski', 'slug': 'robert...",False,goal,regular,assisted,"{'x': 13.8, 'y': 47.4, 'z': 0}",left-foot,low-right,"{'x': 0, 'y': 46.4, 'z': 6.3}",0.079959,...,4600335,72,,4302,"{'start': {'x': 47.4, 'y': 13.8}, 'end': {'x':...",19,1098,1602,shot,
7,"{'name': 'Alexander Sørloth', 'slug': 'alexand...",True,goal,regular,assisted,"{'x': 9.4, 'y': 50, 'z': 0}",left-foot,low-right,"{'x': 0, 'y': 46.1, 'z': 13.3}",0.467966,...,4600325,70,,4166,"{'start': {'x': 50, 'y': 9.4}, 'end': {'x': 53...",21,1234,1466,shot,
8,"{'name': 'Clément Lenglet', 'slug': 'clement-l...",True,miss,,corner,"{'x': 7.4, 'y': 47.2, 'z': 0}",head,high-right,"{'x': 0, 'y': 39.7, 'z': 62.5}",0.100995,...,4600315,68,,4041,"{'start': {'x': 47.2, 'y': 7.4}, 'end': {'x': ...",23,1359,1341,shot,
9,"{'name': 'Robert Lewandowski', 'slug': 'robert...",False,miss,,assisted,"{'x': 4.9, 'y': 50.1, 'z': 0}",head,high,"{'x': 0, 'y': 54.3, 'z': 68.1}",0.256534,...,4600295,59,,3505,"{'start': {'x': 50.1, 'y': 4.9}, 'end': {'x': ...",32,1895,805,shot,
