# National League archives

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
import requests
from bs4 import BeautifulSoup

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [4]:
today = pd.Timestamp("today").strftime("%m/%d/%Y")

---

## Get data

#### Harvest links to historical tables from [archives page](https://www.thenationalleague.org.uk/match-info/archives)

In [5]:
html = requests.get("https://www.thenationalleague.org.uk/match-info/archives")
soup = BeautifulSoup(html.text, "html.parser")

In [6]:
archive_list = []

for link in soup.findAll("a", attrs={"class": "u-palm-block"}):
    archive_dict = {
        "season": link.text.replace(" Season", "").replace(" season", "").strip(),
        "url": link["href"],
    }
    archive_list.append(archive_dict)

In [7]:
archive_df = pd.DataFrame(archive_list)
archive_df.head()

Unnamed: 0,season,url
0,2021/22,https://www.thenationalleague.org.uk/archive10...
1,2020/21,https://www.thenationalleague.org.uk/archive99...
2,2019/20,https://www.thenationalleague.org.uk/archive88...
3,2018/19,https://www.thenationalleague.org.uk/archive77...
4,2017/18,https://www.thenationalleague.org.uk/archive65...


---

In [8]:
dfs = []

for s, u in zip(archive_df.season.to_list(), archive_df.url.to_list()):
    df = pd.read_html(u, header=0)[0].assign(season=s)
    dfs.append(df)

In [9]:
src = pd.concat(dfs).reset_index(drop=True)

In [12]:
src_19 = (
    src.query('season == "2019/20"')
    .drop(["PPG", "Adj", "Pts"], axis=1)
    .drop(["Pl"], axis=1)
    .rename(columns={"Points": "Pts", "Played": "Pl"})
)
src_rest = src.query('season != "2019/20"').drop(
    ["Played", "Points", "PPG", "Adj"], axis=1
)

In [18]:
src_df = pd.concat([src_rest, src_19])

In [31]:
df = (
    src_df.rename(
        columns={
            "#": "place",
            "Team": "team",
            "Pl": "games_played",
            "HW": "home_wins",
            "HD": "home_draw",
            "HL": "home_losses",
            "AW": "away_wins",
            "AD": "away_draws",
            "AL": "away_losses",
            "W": "wins",
            "D": "draws",
            "L": "losses",
            "F": "goals_for",
            "A": "goals_against",
            "Diff": "diff",
            "Pts": "points",
            "season": "season",
        }
    )
    .sort_values(["season", "place"])
    .copy()
)

In [32]:
df.query('team == "Wrexham"')

Unnamed: 0,place,team,games_played,home_wins,home_draw,home_losses,away_wins,away_draws,away_losses,wins,draws,losses,goals_for,goals_against,diff,points,season
265,4.0,Wrexham,46.0,13,7,3,9,8,6,22,15,9,66,49,17,81.0,2010/11
239,2.0,Wrexham,46.0,16,3,4,14,5,4,30,8,8,85,33,52,98.0,2011/12
218,5.0,Wrexham,46.0,11,9,3,11,5,7,22,14,10,74,45,29,80.0,2012/13
206,17.0,Wrexham,46.0,11,5,7,5,6,12,16,11,19,61,61,0,59.0,2013/14
176,11.0,Wrexham,46.0,9,8,6,8,7,8,17,15,14,56,52,4,66.0,2014/15
149,8.0,Wrexham,45.0,13,4,6,7,5,10,20,9,16,70,53,17,69.0,2015/16
130,13.0,Wrexham,23.0,5,5,2,2,4,5,7,9,7,23,29,-6,30.0,2016/17
103,10.0,Wrexham,46.0,10,10,3,7,9,7,17,19,10,49,39,10,70.0,2017/18
73,4.0,Wrexham,46.0,17,3,3,8,6,9,25,9,12,58,39,19,84.0,2018/19
64,19.0,Wrexham,37.0,7,7,4,4,3,12,11,16,10,46,49,-3,43.0,2019/20


---

## Exports

In [33]:
df.to_csv("data/processed/national_league_tables_2010_2021.csv", index=False)

In [34]:
df.query('team == "Wrexham"').to_csv(
    "data/processed/wrexham_tables_2010_2021.csv", index=False
)