In [4]:
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd

async def main(season):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.goto(f"https://www.flashscore.com/football/england/national-league-{season}-{season+1}/results/")

        # Keep clicking "Show more matches" until it disappears
        while True:
            try:
                show_more_button = await page.query_selector(".event__more--static")
                if show_more_button:
                    await show_more_button.click()
                    await page.wait_for_timeout(5000)  # Wait for new content to load
                else:
                    break
            except:
                # Handle potential errors during button click or waiting
                print(f"Error clicking 'Show more matches' in {season}")
                pass

        # Capture the page HTML
        html = await page.content()

        await browser.close()
        return html

In [5]:
def scrape_matches(html, season):
    soup = BeautifulSoup(html, 'lxml')

    matches = soup.find_all("div", class_="event__match")

    match_records = []

    for match in matches:
        home_team = match.find("div", class_="event__participant--home").text
        away_team = match.find("div", class_="event__participant--away").text

        home_score = match.find("div", class_="event__score--home").text
        away_score = match.find("div", class_="event__score--away").text

        game_date = match.find("div", class_="event__time").text
        day = game_date.split(".")[0]
        month = game_date.split(".")[1]
        if int(month) >= 8:
            year = season
        else:
            year = season + 1

        match_record = {
            "home_team": home_team,
            "away_team": away_team,
            "home_score": home_score,
            "away_score": away_score,
            "year": year,
            "month": month,
            "day": day,
            "game_date": game_date
        }
        match_records.append(match_record)
    return match_records

In [6]:
for season in range(2015, 2018):
    html = await main(season)
    match_records = scrape_matches(html, season)
    df = pd.DataFrame(match_records)
    df["date"] = pd.to_datetime(df[["year", "month", "day"]])
    df.to_csv(f"national_league_{season}_{season+1}.csv", index=False)
    print(f"Saved {len(df)} matches for season {season}-{season+1}")

Saved 557 matches for season 2015-2016
Saved 557 matches for season 2016-2017
Saved 557 matches for season 2017-2018
