In [1]:
import requests
from bs4 import BeautifulSoup

# First, try to scrape attendance value from specific match URL

In [2]:
# These test URLS contain 22/23 Belgium 1st div game, 07/08 English League One game,
# 20/21 (covid!) English League One game, 16/17 Cyprus 1st div game

urls = [
    "https://www.worldfootball.net/report/eerste-klasse-a-2022-2023-krc-genk-sv-zulte-waregem/",
    "https://www.worldfootball.net/report/league-one-2007-2008-millwall-fc-tranmere-rovers/",
    "https://www.worldfootball.net/report/league-one-2020-2021-ipswich-town-fleetwood-town/",
    "https://www.worldfootball.net/report/first-division-2016-2017-apoel-nikosia-anorthosis-famagusta-fc/"
]

# Loop to scrape attendance figures from URL list:

for URL in urls:
    print(f"Fetching data for: {URL}")

    # Fetch the webpage
    response = requests.get(URL)
    response.raise_for_status()  # Will raise an exception if there's an error

    # Parse the content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    results = soup.find(id="site").find_all("td", class_="dunkel")

    attendance_td = None
    for i, td in enumerate(results):
        img = td.find('img', title='Attendance')
        if img:
            attendance_td = results[i + 1]  # Get the next <td> element after the img
            break

    if attendance_td:
        attendance = attendance_td.get_text(strip=True)
        print(f"Attendance: {attendance}")
    else:
        print("Attendance not found.")
    print("--------------------")  # To separate results for clarity


Fetching data for: https://www.worldfootball.net/report/eerste-klasse-a-2022-2023-krc-genk-sv-zulte-waregem/
Attendance: 14.111
--------------------
Fetching data for: https://www.worldfootball.net/report/league-one-2007-2008-millwall-fc-tranmere-rovers/
Attendance: 8.925
--------------------
Fetching data for: https://www.worldfootball.net/report/league-one-2020-2021-ipswich-town-fleetwood-town/
Attendance: without spectators.
--------------------
Fetching data for: https://www.worldfootball.net/report/first-division-2016-2017-apoel-nikosia-anorthosis-famagusta-fc/
Attendance not found.
--------------------


# Next, try to scrape attendance values from all matches in specific game week

In [3]:
URL = "https://www.worldfootball.net/schedule/fra-ligue-2-2018-2019-spieltag/10/"

# Fetch the webpage
response = requests.get(URL)
response.raise_for_status()  # Will raise an exception if there's an error

# Parse the content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
results = soup.find(id="site").find_all('td', align='center')

urls = []

base_url = "https://www.worldfootball.net"

for td in results:
    a_tag = td.find('a', href=True)
    if a_tag and 'report' in a_tag['href']:
        URL = base_url + a_tag['href']
        urls.append(URL)

for URL in urls:
    print(f"Fetching data for: {URL}")

    # Fetch the webpage
    response = requests.get(URL)
    response.raise_for_status()  # Will raise an exception if there's an error

    # Parse the content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    results = soup.find(id="site").find_all("td", class_="dunkel")

    attendance_td = None
    for i, td in enumerate(results):
        img = td.find('img', title='Attendance')
        if img:
            attendance_td = results[i + 1]  # Get the next <td> element after the img
            break

    if attendance_td:
        attendance = attendance_td.get_text(strip=True)
        print(f"Attendance: {attendance}")
    else:
        print("Attendance not found.")
    print("--------------------")  # To separate results for clarity

Fetching data for: https://www.worldfootball.net/report/ligue-2-2018-2019-gfc-ajaccio-us-orleans/
Attendance: 2.498
--------------------
Fetching data for: https://www.worldfootball.net/report/ligue-2-2018-2019-lb-chateauroux-estac-troyes/
Attendance: 6.861
--------------------
Fetching data for: https://www.worldfootball.net/report/ligue-2-2018-2019-grenoble-foot-38-clermont-foot/
Attendance: 6.389
--------------------
Fetching data for: https://www.worldfootball.net/report/ligue-2-2018-2019-le-havre-ac-as-beziers/
Attendance: 5.297
--------------------
Fetching data for: https://www.worldfootball.net/report/ligue-2-2018-2019-fc-lorient-as-nancy/
Attendance: 6.952
--------------------
Fetching data for: https://www.worldfootball.net/report/ligue-2-2018-2019-chamois-niortais-paris-fc/
Attendance: 3.397
--------------------
Fetching data for: https://www.worldfootball.net/report/ligue-2-2018-2019-red-star-fc-ac-ajaccio/
Attendance: 1.863
--------------------
Fetching data for: https://w