In [7]:
import requests

from bs4 import BeautifulSoup

In [8]:
response = requests.get(
    'https://www.sofascore.com/arsenal-manchester-city/rsR#10385636',
    headers={'User-Agent': 'Mozilla/5.0'} # you'll be blocked if you don't use some type of user agent
)

In [9]:
soup = BeautifulSoup(response.text, 'html.parser')

If we wanted to scrape all of the shots using just normal requests and beautiful soup...
We'd have to do some pretty complicated stuff to get the data we want

In [13]:
# As you can see we can't even get the shots because they are dynamically loaded
soup.select('g[cursor="pointer"]')

[]

What we'll do is use the API's they are loading to call that data directly

If we look in the `network` tab of the developer tools we can see the API calls they are making
and then we can copy that and make it ourselves in our code

# Steps:
1. Find the API call `shotmap` in the network tab
2. Right click and copy as a cURL
3. go to curlconverter.com and paste the cURL
4. copy the python code

In [14]:
# These are the headers we need to access the API
headers = {
    'authority': 'api.sofascore.com',
    'accept': '*/*',
    'accept-language': 'en-US,en;q=0.9',
    'cache-control': 'max-age=0',
    'dnt': '1',
    'if-none-match': 'W/"4bebed6144"',
    'origin': 'https://www.sofascore.com',
    'referer': 'https://www.sofascore.com/',
    'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
}

# you'll need to change the url to match the one you copied. As you can see they have some sort of match id to identify the game --> 10385636
response = requests.get('https://api.sofascore.com/api/v1/event/10385636/shotmap', headers=headers)

In [17]:
# If you make the request without doing anything else though you will get a 304 response
# which means "not modified"
response

<Response [304]>

In [19]:
# The way to fix this is to add the 'If-Modified-Since' header to the request
# You can change this dynamically or just hardcode and change it every time you run the script
headers['If-Modified-Since'] = 'Tues, 18 Jul 2023 00:00:00 GMT'

In [20]:
# Now we get a 200 response
response = requests.get('https://api.sofascore.com/api/v1/event/10385636/shotmap', headers=headers)
response

<Response [200]>

In [21]:
# Now we can get the data we want
shots = response.json()

In [22]:
shots

{'shotmap': [{'player': {'name': 'Erling Haaland',
    'firstName': '',
    'lastName': '',
    'slug': 'erling-haaland',
    'shortName': 'E. Haaland',
    'position': 'F',
    'userCount': 252657,
    'id': 839956},
   'isHome': True,
   'shotType': 'goal',
   'goalType': 'regular',
   'situation': 'assisted',
   'playerCoordinates': {'x': 13.9, 'y': 50.4, 'z': 0},
   'bodyPart': 'left-foot',
   'goalMouthLocation': 'low-centre',
   'goalMouthCoordinates': {'x': 0, 'y': 51.7, 'z': 3.8},
   'xg': 0.32696941494942,
   'xgot': 0.3211,
   'id': 1961580,
   'time': 90,
   'addedTime': 5,
   'timeSeconds': 5671,
   'draw': {'start': {'x': 50.4, 'y': 13.9},
    'end': {'x': 48.3, 'y': 0},
    'goal': {'x': 48.3, 'y': 96.2}},
   'reversedPeriodTime': 1,
   'reversedPeriodTimeSeconds': 629,
   'incidentType': 'shot'},
  {'player': {'name': 'Rob Holding',
    'slug': 'rob-holding',
    'shortName': 'R. Holding',
    'position': 'D',
    'userCount': 2093,
    'id': 793228},
   'isHome': False,

### This is an example of one of the many ways to get data from sofascore,you can also look through their different API's and see what you can find