In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from io import StringIO

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
}

def scrape_table(url):
    r = requests.get(url, headers = headers)
    doc = BeautifulSoup(r.text, 'html.parser')
    table = pd.read_html(StringIO(str(doc)))[0]
    return table

url = "https://www.11v11.com/league-tables/"

comp_table = scrape_table(url)

comp_table

Unnamed: 0,Competition,First season,Last season,First match date,Last match date
0,Premier League,1993,2024,1992-08-15,2023-12-17
1,League Championship,2005,2024,2004-08-07,2023-12-18
2,League One,2005,2024,2004-08-07,2023-12-16
3,League Two,2005,2024,2004-08-07,2023-12-16
4,League Division 1,1993,2004,1992-08-15,2004-05-09
5,League Division 2,1993,2004,1992-08-15,2004-05-08
6,League Division 3,1993,2004,1992-08-15,2004-05-08
7,League Division One,1889,1992,1888-09-08,1992-05-02
8,League Division Two,1893,1992,1892-09-03,1992-05-02
9,League Division Three,1959,1992,1958-08-23,1992-05-02


In [2]:
def create_url(comp, season):
    comp = comp.lower().replace(" ", "-").replace("(", "").replace(")", "")
    url = f"https://www.11v11.com/league-tables/{comp}/{season}/"
    return url

comp_dict = comp_table[["Competition", "First season", "Last season"]].to_dict(orient="records")

skip_years = list(range(1940, 1947))

urls = []
for comp in comp_dict:
    seasons = list(range(comp["First season"], comp["Last season"] + 1))
    for season in seasons:
        if season > 1921 and season not in skip_years:
            url = create_url(comp["Competition"], season)
            urls.append(url)

urls[:5]

['https://www.11v11.com/league-tables/premier-league/1993/',
 'https://www.11v11.com/league-tables/premier-league/1994/',
 'https://www.11v11.com/league-tables/premier-league/1995/',
 'https://www.11v11.com/league-tables/premier-league/1996/',
 'https://www.11v11.com/league-tables/premier-league/1997/']

In [5]:
tiers = {
    'Premier League': 1,
    'League Championship': 2,
    'League One': 3,
    'League Two': 4,
    'League Division 1': 2,
    'League Division 2': 3,
    'League Division 3': 4,
    'League Division One': 1,
    'League Division Two': 2,
    'League Division Three': 3,
    'League Division Four': 4,
    'Division Three North': 3,
    'Division Three South': 3
}

def get_season(url):
    ssn_2 = int(url.split("/")[-2])
    ssn_1 = int(ssn_2) - 1
    return f"{ssn_1}/{str(ssn_2)[-2:]}"


def format_table(table):
    table.columns = table.columns.str.lower()
    
    table["season"] = get_season(url)
    table["pos"] = table.index + 1
    table["competition"] = url.split("/")[4].replace("-", " ").title()
    table["league_tier"] = table["competition"].map(tiers)
    table["url"]= url
    
    table = table[['season', 'competition', 'league_tier', 'pos', 'team', 'pld', 'w', 'd', 'l', 'gf', 'ga', 'gd', 'pts', 'url']]
    return table

tables = []
counter = 0
total = len(urls)
for url in urls:
    table = scrape_table(url)
    table = format_table(table)

    tables.append(table)
    counter += 1
    print(f"Scraped {counter} of {total}: {url}")

tabs_df = pd.concat(tables, ignore_index=True)

Scraped 1 of 384: https://www.11v11.com/league-tables/premier-league/1993/
Scraped 2 of 384: https://www.11v11.com/league-tables/premier-league/1994/
Scraped 3 of 384: https://www.11v11.com/league-tables/premier-league/1995/
Scraped 4 of 384: https://www.11v11.com/league-tables/premier-league/1996/
Scraped 5 of 384: https://www.11v11.com/league-tables/premier-league/1997/
Scraped 6 of 384: https://www.11v11.com/league-tables/premier-league/1998/
Scraped 7 of 384: https://www.11v11.com/league-tables/premier-league/1999/
Scraped 8 of 384: https://www.11v11.com/league-tables/premier-league/2000/
Scraped 9 of 384: https://www.11v11.com/league-tables/premier-league/2001/
Scraped 10 of 384: https://www.11v11.com/league-tables/premier-league/2002/
Scraped 11 of 384: https://www.11v11.com/league-tables/premier-league/2003/
Scraped 12 of 384: https://www.11v11.com/league-tables/premier-league/2004/
Scraped 13 of 384: https://www.11v11.com/league-tables/premier-league/2005/
Scraped 14 of 384: ht

In [6]:
tabs_df.head(5)

Unnamed: 0,season,competition,league_tier,pos,team,pld,w,d,l,gf,ga,gd,pts,url
0,1992/93,Premier League,1,1,Manchester United,42,24,12,6,67,31,36,84,https://www.11v11.com/league-tables/premier-le...
1,1992/93,Premier League,1,2,Aston Villa,42,21,11,10,57,40,17,74,https://www.11v11.com/league-tables/premier-le...
2,1992/93,Premier League,1,3,Norwich City,42,21,9,12,61,65,-4,72,https://www.11v11.com/league-tables/premier-le...
3,1992/93,Premier League,1,4,Blackburn Rovers,42,20,11,11,68,46,22,71,https://www.11v11.com/league-tables/premier-le...
4,1992/93,Premier League,1,5,Queens Park Rangers,42,17,12,13,63,55,8,63,https://www.11v11.com/league-tables/premier-le...


In [7]:
tabs_df.to_csv("./data/league_tables.csv", index=False)