# Barclay's Premiere League: Shooting Stats

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import requests
import json
from bs4 import BeautifulSoup

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.set_option("display.max_colwidth", None)

In [4]:
today = pd.Timestamp("today").strftime("%m/%d/%Y")

---

## Get data

#### Get all the urls for past seasons

In [5]:
past_urls = []

for f, l in zip(range(2000, 2023), range(2001, 2024)):
    past_urls.append(
        f"https://fbref.com/en/comps/9/{str(f)}-{str(l)}/{str(f)}-{str(l)}-Premier-League-Stats"
    )

In [6]:
past_urls[0:3]

['https://fbref.com/en/comps/9/2000-2001/2000-2001-Premier-League-Stats',
 'https://fbref.com/en/comps/9/2001-2002/2001-2002-Premier-League-Stats',
 'https://fbref.com/en/comps/9/2002-2003/2002-2003-Premier-League-Stats']

#### Get teams for each season

In [7]:
links_list = []

for past_url in past_urls:
    response = requests.get(past_url)
    html = BeautifulSoup(response.text, "html.parser")
    table = html.findAll("table")[0]
    links = table.select("a[href*='squads']")

    for l in links:
        links_dict = {
            # "id": l["href"],
            "url": "https://fbref.com" + l["href"],
            "season": past_url,
        }
        links_list.append(links_dict)

In [8]:
teams_df = pd.DataFrame(links_list)

In [9]:
teams_df["squad_id"] = teams_df["url"].str.split("/", expand=True)[5]

In [10]:
teams_df[["test1", "test2"]] = teams_df["url"].str.split("/", expand=True)[[6, 7]]

In [11]:
# Replace values of columns by using DataFrame.loc[] property.
teams_df.loc[teams_df["test2"].isnull(), "test2"] = teams_df["test1"]

In [12]:
teams_df["squad"] = (
    teams_df["test2"]
    .str.replace("-Stats", "", regex=False)
    .str.replace("-", " ", regex=False)
)

#### Loop through the squads and past seaons to snag stats

In [13]:
squads_list = []

for s, l in teams_df.iterrows():
    players = (pd.read_html(l["url"], header=1)[0]).assign(
        squad_id=l["url"], squad=l["squad"], season=l["season"], url=l["url"]
    )
    squads_list.append(players)

In [14]:
players_src = pd.concat(squads_list)

In [15]:
players_df = players_src[~players_src["Player"].str.contains("Total")].fillna(0).copy()

In [16]:
players_df.sort_values("Gls", ascending=False).head()

Unnamed: 0,Player,Nation,Pos,Age,MP,Starts,Min,90s,Gls,Ast,G-PK,PK,PKatt,CrdY,CrdR,Gls.1,Ast.1,G+A,G-PK.1,G+A-PK,Matches,squad_id,squad,season,url,xG,npxG,xAG,npxG+xAG,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1
0,Mohamed Salah,eg EGY,FW,25.0,36,34,2907.0,32.3,32.0,10.0,31.0,1.0,2.0,1.0,0.0,0.99,0.31,1.3,0.96,1.27,Matches,https://fbref.com/en/squads/822bd0ba/2017-2018/Liverpool-Stats,Liverpool,https://fbref.com/en/comps/9/2017-2018/2017-2018-Premier-League-Stats,https://fbref.com/en/squads/822bd0ba/2017-2018/Liverpool-Stats,24.1,22.6,7.5,30.1,0.75,0.23,0.98,0.7,0.93
4,Cristiano Ronaldo,pt POR,"FW,MF",22.0,34,31,2747.0,30.5,31.0,6.0,27.0,4.0,5.0,5.0,1.0,1.02,0.2,1.21,0.88,1.08,Matches,https://fbref.com/en/squads/19538871/2007-2008/Manchester-United-Stats,Manchester United,https://fbref.com/en/comps/9/2007-2008/2007-2008-Premier-League-Stats,https://fbref.com/en/squads/19538871/2007-2008/Manchester-United-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Luis Suárez,uy URU,FW,26.0,33,33,2962.0,32.9,31.0,12.0,31.0,0.0,0.0,6.0,0.0,0.94,0.36,1.31,0.94,1.31,Matches,https://fbref.com/en/squads/822bd0ba/2013-2014/Liverpool-Stats,Liverpool,https://fbref.com/en/comps/9/2013-2014/2013-2014-Premier-League-Stats,https://fbref.com/en/squads/822bd0ba/2013-2014/Liverpool-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Thierry Henry,fr FRA,FW,25.0,37,37,3330.0,37.0,30.0,6.0,23.0,7.0,7.0,3.0,0.0,0.81,0.16,0.97,0.62,0.78,Matches,https://fbref.com/en/squads/18bb7c10/2003-2004/Arsenal-Stats,Arsenal,https://fbref.com/en/comps/9/2003-2004/2003-2004-Premier-League-Stats,https://fbref.com/en/squads/18bb7c10/2003-2004/Arsenal-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Harry Kane,eng ENG,FW,24.0,37,35,3076.0,34.2,30.0,2.0,28.0,2.0,3.0,5.0,0.0,0.88,0.06,0.94,0.82,0.88,Matches,https://fbref.com/en/squads/361ca564/2017-2018/Tottenham-Hotspur-Stats,Tottenham Hotspur,https://fbref.com/en/comps/9/2017-2018/2017-2018-Premier-League-Stats,https://fbref.com/en/squads/361ca564/2017-2018/Tottenham-Hotspur-Stats,24.8,22.4,2.7,25.1,0.72,0.08,0.8,0.66,0.73


In [17]:
len(players_df)

13360

In [18]:
players_df["season"] = players_df["season"].str.split(pat="/", expand=True)[6]

In [19]:
players_df

Unnamed: 0,Player,Nation,Pos,Age,MP,Starts,Min,90s,Gls,Ast,G-PK,PK,PKatt,CrdY,CrdR,Gls.1,Ast.1,G+A,G-PK.1,G+A-PK,Matches,squad_id,squad,season,url,xG,npxG,xAG,npxG+xAG,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1
0,Gary Neville,eng ENG,DF,25.0,32,32,2849.0,31.7,1.0,1.0,1.0,0.0,0.0,4.0,0.0,0.03,0.03,0.06,0.03,0.06,Matches,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,Manchester United,2000-2001,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Fabien Barthez,fr FRA,GK,29.0,30,30,2675.0,29.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,Matches,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,Manchester United,2000-2001,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,David Beckham,eng ENG,MF,25.0,31,29,2648.0,29.4,9.0,12.0,8.0,1.0,1.0,3.0,0.0,0.31,0.41,0.71,0.27,0.68,Matches,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,Manchester United,2000-2001,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Paul Scholes,eng ENG,MF,25.0,32,28,2450.0,27.2,6.0,5.0,6.0,0.0,1.0,3.0,0.0,0.22,0.18,0.40,0.22,0.40,Matches,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,Manchester United,2000-2001,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Roy Keane,ie IRL,MF,28.0,28,28,2380.0,26.4,2.0,7.0,2.0,0.0,0.0,2.0,1.0,0.08,0.26,0.34,0.08,0.34,Matches,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,Manchester United,2000-2001,https://fbref.com/en/squads/19538871/2000-2001/Manchester-United-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,Ryan Finnigan,eng ENG,MF,19-115,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,Matches,https://fbref.com/en/squads/33c895d4/Southampton-Stats,Southampton,2022-2023,https://fbref.com/en/squads/33c895d4/Southampton-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29,Alex McCarthy,eng ENG,GK,33-044,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,Matches,https://fbref.com/en/squads/33c895d4/Southampton-Stats,Southampton,2022-2023,https://fbref.com/en/squads/33c895d4/Southampton-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
30,Jimmy Morgan,eng ENG,FW,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,Matches,https://fbref.com/en/squads/33c895d4/Southampton-Stats,Southampton,2022-2023,https://fbref.com/en/squads/33c895d4/Southampton-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31,Mislav Oršić,hr CRO,"FW,MF",30-018,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,Matches,https://fbref.com/en/squads/33c895d4/Southampton-Stats,Southampton,2022-2023,https://fbref.com/en/squads/33c895d4/Southampton-Stats,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Export

#### Just the archive

In [22]:
players_df.to_csv("data/processed/all_players_stats_archive_current.csv", index=False)