# English football: How the table evolved each week of the 2020-21 season

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
import altair as alt
import matplotlib.pyplot as plt
import pageviewapi
import datetime as dt
import requests
from bs4 import BeautifulSoup

ModuleNotFoundError: No module named 'altair_latimes'

In [None]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

In [None]:
today = dt.datetime.today().strftime("%Y%m%d")

### Get all the tables and combine them into a dataframe

In [None]:
dfs = []
tables = []

for r in range(1, 39):
    tables.append(
        pd.read_html(
            "https://www.worldfootball.net/schedule/eng-premier-league-2020-2021-spieltag/"
            + str(r)
        )[3]
    )

for t in tables:
    dfs.append(pd.DataFrame(t))

In [None]:
df = pd.concat(dfs)

### Clean up the dataframe

In [None]:
df.drop(["Team"], axis=1, inplace=True)

In [None]:
df.rename(
    columns={
        "#": "place",
        "Team.1": "club",
        "M.": "match_number",
        "W": "wins",
        "D": "draws",
        "L": "losses",
        "goals": "goals",
        "Dif.": "differential",
        "Pt.": "points",
    },
    inplace=True,
)

### Split goals columns into two

In [None]:
df[["goals_for", "goals_against"]] = df["goals"].str.split(":", expand=True)

In [None]:
df.drop(["goals"], axis=1, inplace=True)

In [None]:
df["match_number"] = df["match_number"].astype(str)

In [None]:
df["goals_for"] = df["goals_for"].astype(str)
df["goals_against"] = df["goals_against"].astype(int)

In [None]:
df.head(20)

In [None]:
df["place"] = df["place"].fillna(method="ffill")

In [None]:
df.head()

---

### One club

In [None]:
df[df["club"] == "Tottenham Hotspur"].head()

### Chart them all

In [None]:
alt.Chart(df).mark_line(interpolate="step-after", size=2).encode(
    x=alt.X(
        "match_number:Q",
        title="Week",
        scale=alt.Scale(domain=(0, 38), zero=False),
        axis=alt.Axis(tickCount=5, grid=False),
    ),
    y=alt.Y(
        "place:Q",
        title="Place",
        scale=alt.Scale(domain=(20, 2), zero=False),
        axis=alt.Axis(
            format="",
            tickSize=0,
            domainOpacity=0,
            tickCount=5,
            offset=4,
            gridWidth=0.6,
            gridColor="#dddddd",
        ),
    ),
    facet=alt.Facet("club", title="", columns=5),
).properties(
    width=150,
    height=100,
    title="2020-21: Club standing in Premier League table, by week",
).configure_view(
    strokeOpacity=0
).configure_axis(
    labelFontSize=13
)

In [None]:
alt.Chart(df[df["club"] == "Manchester City"]).mark_line(
    interpolate="step-after", size=2
).encode(
    x=alt.X(
        "match_number:Q",
        title="Game",
        scale=alt.Scale(domain=(0, 38), zero=False),
        axis=alt.Axis(tickCount=5, grid=False),
    ),
    y=alt.Y(
        "place:Q",
        title="Place",
        scale=alt.Scale(domain=(20, 2), zero=False),
        axis=alt.Axis(
            format="",
            tickSize=0,
            domainOpacity=0,
            tickCount=5,
            offset=4,
            gridWidth=0.6,
            gridColor="#dddddd",
        ),
    ),
).properties(
    width=600,
    height=300,
    title="2020-21: Manchester City standing in Premier League table, by week",
).configure_view(
    strokeOpacity=0
).configure_axis(
    labelFontSize=13
)

---

### Exports

In [None]:
df.to_csv("output/premier-league-weekly-standings.csv", index=False)

In [None]:
df.to_json("output/premier-league-weekly-standings.json", indent=2, orient="records")