# English football stadiums

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import requests
import json
from bs4 import BeautifulSoup
from dms2dec.dms_convert import dms2dec
import contextily as cx

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.set_option("display.max_colwidth", None)

In [4]:
today = pd.Timestamp("today").strftime("%m/%d/%Y")

---

## Get data

#### Read table showing past winners

In [5]:
src = (
    pd.read_html("https://en.wikipedia.org/wiki/List_of_Premier_League_stadiums")[0]
).fillna("")

In [6]:
src.columns = (
    src.columns.str.lower()
    .str.replace(" ", "_")
    .str.replace("#_", "")
    .str.replace("_(m)", "", regex=False)
)

In [7]:
src["coordinates"] = src["coordinates"].str.replace(
    ".mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}",
    "",
    regex=False,
)

In [8]:
src = src.drop(["image", "ref.", "pitch_length", "pitch_width"], axis=1).copy()

In [9]:
src["opened"] = src["opened"].astype(str)
src["closed"] = src["closed"].astype(str).str.replace(".0", "", regex=False)

In [10]:
src[["coordinates_dms", "coordinates_other"]] = src["coordinates"].str.split(
    " / ", expand=True
)

In [11]:
src["stadium"] = (
    src["stadium"]
    .str.replace(" Formerly Reebok Stadium and Macron Stadium", "", regex=False)
    .str.replace(" Formerly Britannia Stadium", "", regex=False)
    .str.replace(" (also known as Upton Park)", "", regex=False)
    .str.replace(" Formerly Walkers Stadium", "", regex=False)
    .str.replace(" Formerly JJB Stadium", "", regex=False)
    .str.replace(" Formerly Olympic Stadium", "", regex=False)
    .str.replace(" Formerly KCOM Stadium", "", regex=False)
    .str.replace(" Formerly Liberty Stadium", "", regex=False)
)

In [12]:
df = src[(src["closed"] == "") & (src["club"] != "—[nb 1]")].copy()

In [13]:
df.head()

Unnamed: 0,stadium,club,location,opened,closed,capacity,coordinates,coordinates_dms,coordinates_other
0,Anfield,Liverpool,Liverpool,1884,,61276,".mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct,.mw-parser-output .geo-inline-hidden{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}53°25′51″N 002°57′39″W﻿ / ﻿53.43083°N 2.96083°W",".mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct,.mw-parser-output .geo-inline-hidden{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}53°25′51″N 002°57′39″W﻿",﻿53.43083°N 2.96083°W
4,bet365 Stadium,Stoke City,Stoke-on-Trent,1997,,30089,52°59′18″N 002°10′32″W﻿ / ﻿52.98833°N 2.17556°W,52°59′18″N 002°10′32″W﻿,﻿52.98833°N 2.17556°W
5,Bloomfield Road,Blackpool,Blackpool,1899,,16220,53°48′17″N 3°2′53″W﻿ / ﻿53.80472°N 3.04806°W,53°48′17″N 3°2′53″W﻿,﻿53.80472°N 3.04806°W
7,Boundary Park,Oldham Athletic,Oldham,1904,,13559,53°33′19″N 002°07′43″W﻿ / ﻿53.55528°N 2.12861°W,53°33′19″N 002°07′43″W﻿,﻿53.55528°N 2.12861°W
8,Bramall Lane,Sheffield United,Sheffield,1855,,32050,53°22′13″N 001°28′15″W﻿ / ﻿53.37028°N 1.47083°W,53°22′13″N 001°28′15″W﻿,﻿53.37028°N 1.47083°W


In [14]:
df["opened"] = (
    df["opened"]
    .str.replace("[16]", "", regex=False)
    .str.replace("[17]", "", regex=False)
    .str.replace("[27]", "", regex=False)
)

In [16]:
df[["lat", "lon"]] = df["coordinates_other"].str.split(" ", expand=True)

In [17]:
df.lat = df.lat.apply(dms2dec)
df.lon = df.lon.apply(dms2dec)

In [18]:
df = df.drop(
    ["closed", "coordinates_dms", "coordinates_other", "coordinates"], axis=1
).copy()

In [19]:
df.sort_values("capacity", ascending=False)

Unnamed: 0,stadium,club,location,opened,capacity,lat,lon
40,Old Trafford,Manchester United,Manchester,1910,74031,824.766667,-487.65
52,Tottenham Hotspur Stadium,Tottenham Hotspur,London,2019,62850,1058.866667,-110.65
34,London Stadium,West Ham United,London,2016,62500,948.683333,-27.316667
0,Anfield,Liverpool,Liverpool,1884,61276,771.05,-1603.383333
21,Emirates Stadium,Arsenal,London,2006,60704,976.0,-181.016667
14,City of Manchester Stadium,Manchester City,Manchester,2003,53400,858.1,-335.8
46,St James' Park,Newcastle United,Newcastle upon Tyne,1880,52257,1679.933333,-1037.116667
49,Stadium of Light,Sunderland,Sunderland,1997,48707,1578.066667,-648.216667
58,Villa Park,Aston Villa,Birmingham,1897,42530,900.616667,-1475.533333
50,Stamford Bridge,Chelsea,London,1877,40173,853.783333,-318.516667


---

## Geography

In [None]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat)).set_crs(
    epsg=3857
)

In [None]:
gdf.query('location == "London"').plot()

In [None]:
df.to_csv("data/processed/english_football_stadiums.csv", index=False)
gdf.to_file("data/processed/english_football_stadiums_geo.geojson", driver="GeoJSON")
gdf.query('location == "London"').to_file(
    "data/processed/english_football_stadiums_geo_london.geojson", driver="GeoJSON"
)