In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
url = "https://www.bbc.com/sport/football/premier-league/top-scorers"

response = requests.get(url)

In [3]:
# check for errors

response.raise_for_status()

In [4]:
print(response.raise_for_status())

None


In [5]:
# status code

response.status_code

200

In [6]:
# text (string format)

response.text[:200]

'<!DOCTYPE html><html lang="en-GB" class="no-js"><head><meta charSet="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" /><title data-rh="true">Premier League Top Scorers - B'

In [7]:
type(response.text)

str

In [8]:
response.content[:200]

b'<!DOCTYPE html><html lang="en-GB" class="no-js"><head><meta charSet="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" /><title data-rh="true">Premier League Top Scorers - B'

In [9]:
type(response.content)

bytes

In [10]:
# soup object

soup = BeautifulSoup(response.content, "html.parser")

In [11]:
print(soup.prettify())

<!DOCTYPE html>
<html class="no-js" lang="en-GB">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <title data-rh="true">
   Premier League Top Scorers - BBC Sport
  </title>
  <meta content="Premier League top scorers. Showing assists, time on pitch and the shots on and off target." data-rh="true" name="description"/>
  <meta content="#FFFFFF" data-rh="true" name="theme-color"/>
  <meta content="Premier League top scorers. Showing assists, time on pitch and the shots on and off target." data-rh="true" property="og:description"/>
  <meta content="https://static.files.bbci.co.uk/core/website/assets/static/sport/bbc-sport-logo.0da9386782.png" data-rh="true" property="og:image"/>
  <meta content="BBC Sport" data-rh="true" property="og:site_name"/>
  <meta content="Premier League Top Scorers - BBC Sport" data-rh="true" property="og:title"/>
  <meta content="article" data-rh="true" property="og:type"/>
  <meta content="https://www.b

In [12]:
player_names = []
team_names = []
goals = []
assists = []
num_matches = []
shots = []

In [22]:
try:
    response = requests.get(url)
    response.raise_for_status()
except Exception as e:
    print(e)
else:
    soup = BeautifulSoup(response.content, 'html.parser')
    players = soup.find('tbody').find_all('tr', class_='ssrcss-qqhdqi-TableRowBody e1icz100')
    
    # Initialize lists to store data
    player_names = []
    team_names = []
    goals = []
    assists = []
    num_matches = []
    shots = []
    
    for player in players:
        player_name = player.find('div', class_='ssrcss-m6ah29-PlayerName e1n8xy5b1').get_text(strip=True)
        team_name = player.find('div', class_='ssrcss-qvpga1-TeamsSummary e1n8xy5b0').get_text(strip=True)
        goals_scored = int(player.find('div', class_='ssrcss-18ap757-CellWrapper ef9ipf0').get_text(strip=True))

        stats = player.find_all('div', class_='ssrcss-1vo7v3r-CellWrapper ef9ipf0')
        assists_made = int(stats[0].get_text(strip=True))
        matches_played = int(stats[2].get_text(strip=True))
        shots_taken = int(stats[-3].get_text(strip=True))
          
        player_names.append(player_name)
        team_names.append(team_name)
        goals.append(goals_scored)  # <-- You missed appending to 'goals' list earlier
        assists.append(assists_made)
        num_matches.append(matches_played)
        shots.append(shots_taken)
    
    data = {
        'player': player_names,
        'team': team_names,
        'matches': num_matches, 
        'goals': goals, 
        'assists': assists,  # <-- Fixed: Added missing quote
        'shots': shots
    }
    df_players = pd.DataFrame(data)

In [23]:
df_players

Unnamed: 0,player,team,matches,goals,assists,shots
0,Richarlison,Tottenham,1,2,0,5
1,E. Haaland,Man City,1,2,0,6
2,C. Wood,Nottm Forest,1,2,0,2
3,A. Semenyo,Bournemouth,1,2,0,3
4,H. Ekitiké,Liverpool,1,1,1,4
5,T. Reijnders,Man City,1,1,1,3
6,F. Chiesa,Liverpool,1,1,0,1
7,W. Isidor,Sunderland,1,1,0,1
8,R. Cherki,Man City,1,1,0,1
9,Rodrigo Muniz,Fulham,1,1,0,1
