## Import Libraries

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

## Set up
 - Enter the football team of interest
 - Use underscores if there are spaces in between
 - Eg. football_team = "Manchester_city"

In [None]:
# Insert football team name
football_team = "Chelsea"

In [None]:
# URL link
base_url = "https://understat.com/team/{}/2019".format(football_team)

## Web Scraping

Data variables of interest:
 - Date and time of fixture
 - Team side: whether the team is the home/away side in each fixture
 - Home team: who is the home team in each fixture
 - Away team: who is the away team in each fixture
 - Home team goals: how many goals scored by the home team
 - Away team goals: how many goals scored by the away team
 - Home team xG: home team's expected goals
 - Away team xG: away team's expected goals

In [None]:
#### Web scraping procedures #### 

# Http request
response = requests.get(base_url)

# Html content
html = response.content

# Create BeautifulSoup object
soup = BeautifulSoup(html, "lxml")

# Locating the place with the data of interest
scripts = soup.find_all("script")

#### Processing the json file #### 

string_with_json_obj = ''

# Find data for fixtures
for el in scripts:
    if 'datesData' in el.text:
      string_with_json_obj = el.text.strip()

# strip unnecessary symbols and get only JSON data
ind_start = string_with_json_obj.index("('")+2
ind_end = string_with_json_obj.index("')")
json_data = string_with_json_obj[ind_start:ind_end]

json_data = json_data.encode('utf8').decode('unicode_escape')

# Convert json file to dictionary format
data = json.loads(json_data)

#### Extracting and organising each of the relevant data variables in respective lists ####

# Datetime for each fixture
datetime_list = []

for i in range(len(data)):
    datetime = data[i]["datetime"]
    datetime_list.append(datetime)

# Recording whether the team plays home or away in each fixture
team_side = []

for i in range(len(data)):
    side = data[i]["side"]
    team_side.append(side)

# Name of Home side in each fixture
home_teams = []

for i in range(len(data)):
    team = data[i]["h"]["title"]
    home_teams.append(team)

# Name of Away side in each fixture
away_teams = []

for i in range(len(data)):
    team = data[i]["a"]["title"]
    away_teams.append(team)

# Goals scored by home side in each fixture
home_goals = []

for i in range(len(data)):
    goals = data[i]["goals"]["h"]
    home_goals.append(goals)

# Goals scored by away side in each fixture
away_goals = []

for i in range(len(data)):
    goals = data[i]["goals"]["a"]
    away_goals.append(goals)

# Home side's expected goals in each fixture
home_xG = []

for i in range(len(data)):
    expected_goals = data[i]["xG"]["h"]
    home_xG.append(expected_goals)

# Away side's expected goals in each fixture
away_xG = []

for i in range(len(data)):
    expected_goals = data[i]["xG"]["a"]
    away_xG.append(expected_goals)

## Put all the extracted data into a nice dataframe 

In [None]:
team_df = pd.DataFrame()

team_df["Fixture"] = range(1,39)
team_df["Date"] = datetime_list
team_df["Home/Away"] = team_side
team_df["Home Team"] = home_teams
team_df["Away Team"] = away_teams
team_df["Home goals"] = home_goals
team_df["Away goals"] = away_goals
team_df["Home xG"] = home_xG
team_df["Away xG"] = away_xG

## Additional Data Pre-processing 

 - Compute the goals scored by the team of interest (Team_Goals)
 - Compute the goals conceded by the team of interest (Team_Against)
 - Compute the expected goals achieved by the team of interest (Team_xG)
 - Compute the expected goals against by the team of interest (Team_xGA) 

In [None]:
# First create some empty columns
team_df["Team_Goals"] = ""
team_df["Team_Against"] = ""
team_df["Team_xG"] = ""
team_df["Team_xGA"] = ""

# Team name
team_name = team_df[team_df["Home/Away"]=="h"]["Home Team"].values[0]

# Use for loop to compute the data
for i in range(len(team_df)):
    if team_df["Home Team"][i] == team_name:
        team_df["Team_Goals"][i] = team_df["Home goals"][i]
        team_df["Team_xG"][i] = team_df["Home xG"][i]
        team_df["Team_Against"][i] = team_df["Away goals"][i]
        team_df["Team_xGA"][i] = team_df["Away xG"][i]

    else:
        team_df["Team_Goals"][i] = team_df["Away goals"][i]
        team_df["Team_xG"][i] = team_df["Away xG"][i]
        team_df["Team_Against"][i] = team_df["Home goals"][i]
        team_df["Team_xGA"][i] = team_df["Home xG"][i]

## Export dataframe into a csv file 

In [None]:
team_df.to_csv("{}_fixtures.csv".format(team_name))