In [1]:
!pip3 install apscheduler
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import ast
from apscheduler.schedulers.background import BackgroundScheduler



## Fbref Data

### Obtain Season and Match URLs

In [2]:
season_urls_dic = {
    '2021-2022': 'https://fbref.com/en/comps/9/2021-2022/schedule/2021-2022-Premier-League-Scores-and-Fixtures',
    '2020-2021': 'https://fbref.com/en/comps/9/2020-2021/schedule/2020-2021-Premier-League-Scores-and-Fixtures',
    '2019-2020': 'https://fbref.com/en/comps/9/2019-2020/schedule/2019-2020-Premier-League-Scores-and-Fixtures',
    '2018-2019': 'https://fbref.com/en/comps/9/2018-2019/schedule/2018-2019-Premier-League-Scores-and-Fixtures',
    '2017-2018': 'https://fbref.com/en/comps/9/2017-2018/schedule/2017-2018-Premier-League-Scores-and-Fixtures',
    '2016-2017': 'https://fbref.com/en/comps/9/2016-2017/schedule/2016-2017-Premier-League-Scores-and-Fixtures',
    '2015-2016': 'https://fbref.com/en/comps/9/2015-2016/schedule/2015-2016-Premier-League-Scores-and-Fixtures',
    '2014-2015': 'https://fbref.com/en/comps/9/2014-2015/schedule/2014-2015-Premier-League-Scores-and-Fixtures',
    '2013-2014': 'https://fbref.com/en/comps/9/2013-2014/schedule/2013-2014-Premier-League-Scores-and-Fixtures',
    '2012-2013': 'https://fbref.com/en/comps/9/2012-2013/schedule/2012-2013-Premier-League-Scores-and-Fixtures',
    '2011-2012': 'https://fbref.com/en/comps/9/2011-2012/schedule/2011-2012-Premier-League-Scores-and-Fixtures'
}


In [3]:
def write_season_urls(season_urls):
    df = pd.DataFrame(columns=["Season", "Season URL"])
    for season, url in season_urls.items():
        df = df.append({"Season": season, "Season URL": url}, ignore_index = True)
    
    df.to_csv('season_url.csv', index = False)
    return

# write_season_urls(season_urls_dic)

In [6]:
def find_match_paths(season_url):
    data = requests.get(season_url)
    soup = BeautifulSoup(data.content, "html.parser")
    match_rows = soup.find_all("td", class_="left", attrs={"data-stat": "match_report"})
#     match_dates = soup.find_all("td", class_="left sort_show", attrs={"data-stat": "date"})

    match_url_paths = []
    
    for row in match_rows:
        if (row.find("a", href=re.compile('/en/matches/'))):
            match_url_paths.append(row.find("a", href=re.compile('/en/matches/'))['href'])
    
    return match_url_paths

# match_url_paths = find_match_paths('https://fbref.com/en/comps/9/2021-2022/schedule/2021-2022-Premier-League-Scores-and-Fixtures')
# match_url_paths

In [220]:
def write_match_urls(source_file_path, dest_file_path):
    source_df = pd.read_csv(source_file_path)
    
    df = pd.DataFrame(columns = ["Season", "Match URL"])
    for index, row in source_df.iterrows():
        season = row['Season']
        season_url = row['Season URL']
        
        full_match_path_array = []
        
        match_paths = find_match_paths(season_url)
        base_url = "https://fbref.com"
        for match_path in match_paths:
            full_match_path = base_url + match_path
            full_match_path_array.append({"link": full_match_path})
#         print(full_match_path_array)
        df = df.append({"Season": season, "Match URL": full_match_path_array}, ignore_index = True)
    
    df.to_csv(dest_file_path, index = False)
    return

# write_match_urls('season_url.csv', 'match_url_v2.csv')

### Helper functions to obtain match data from specific match url

In [3]:
df = pd.DataFrame(columns=["Season", 
                      "Home Team", 
                      "Away Team", 
                      "Home Team Score", 
                      "Away Team Score", 
                      "Home Starting Lineup",
                      "Home Sub Lineup",
                      "Away Starting Lineup",
                      "Away Sub Lineup",
                      "Home Goal Timings",
                      "Away Goal Timings",
                      "Home Sub Timings",
                      "Away Sub Timings",
                      ])
df

Unnamed: 0,Season,Home Team,Away Team,Home Team Score,Away Team Score,Home Starting Lineup,Home Sub Lineup,Away Starting Lineup,Away Sub Lineup,Home Goal Timings,Away Goal Timings,Home Sub Timings,Away Sub Timings


In [2]:
# Find team names
def find_teams(soup):
    scorebox = soup.find("div", class_="scorebox").find_all("strong")
    count = 0
    
    for s in scorebox:
        if (s.find("a", href = re.compile("/en/squads/"))):
            count+=1

            if count==1:
                home_team = s.find("a", href = re.compile("/en/squads/")).text
            if count==2:
                away_team = s.find("a", href = re.compile("/en/squads/")).text
    
    return home_team, away_team


# Find scores for each team
def find_score(soup):
    scorebox = soup.find_all("div", class_="scores")
    home_team_score = scorebox[0].find("div", class_="score").text
    away_team_score = scorebox[1].find("div", class_="score").text
    return home_team_score, away_team_score

# Find home and away lineups
def find_lineup(soup):
    field_wrap = soup.find("div", id="field_wrap")
    lineup = field_wrap.find_all("div", class_="lineup")

    if (len(lineup) != 2):
      raise Exception ("Find Lineup Error: Can't find tag in soup")

    home_lineup = lineup[0].find_all("tr")
    away_lineup = lineup[1].find_all("tr")
    
    home_starting_lineup = []
    home_bench_lineup = []

    for row in home_lineup:
        if ((row.find("a") and len(home_starting_lineup)<11)):
            home_starting_lineup.append(row.find("a").text)
        elif ((row.find("a") and len(home_starting_lineup)>=11)):
            home_bench_lineup.append(row.find("a").text)
    
    away_starting_lineup = []
    away_bench_lineup = []

    for row in away_lineup:
        if ((row.find("a") and len(away_starting_lineup)<11)):
            away_starting_lineup.append(row.find("a").text)
        elif ((row.find("a") and len(away_starting_lineup)>=11)):
            away_bench_lineup.append(row.find("a").text)
    
    return home_starting_lineup, home_bench_lineup, away_starting_lineup, away_bench_lineup

# Find goal, substituion and red card timings
# Substitutions: Array of "sub-timing", "sub-out-player", "sub-in-player"
def find_event_timings(soup):
    events_wrap = soup.find("div", id="events_wrap")
    home_team_events = events_wrap.find_all("div", class_="event a")
    away_team_events = events_wrap.find_all("div", class_="event b")
    
    home_team_goal_timings = []
    away_team_goal_timings = []
    home_team_sub_timings = []
    away_team_sub_timings = []
    home_team_red_card_timings = []
    away_team_red_card_timings = []
    
  # Home Team
    for event in home_team_events:
      # Goals
        if (event.find("div", class_="event_icon goal") or event.find("div", class_="event_icon penalty_goal")):
            text = event.find_all("div")[0].text
            text = re.findall('[0-9]+', text)[0]
            home_team_goal_timings.append(int(text))
      # Substitutes
        elif (event.find("div", class_="event_icon substitute_in")):
            sub_timing = event.find_all("div")[0].text
            sub_timing = int(re.findall('[0-9]+', sub_timing)[0])
            sub_players = event.find_all("a", href = True)
            # Only sub out
            if (len(sub_players) == 1):
              sub_in_player = "-"
              sub_out_player = sub_players[0].text
              home_team_sub_timings.append([sub_timing, sub_out_player, sub_in_player])
            # Sub out and sub in
            elif (len(sub_players) == 2):
              sub_in_player = sub_players[0].text
              sub_out_player = sub_players[1].text
              home_team_sub_timings.append([sub_timing, sub_out_player, sub_in_player])
            else:
              raise Exception("Substitutes: Invalid number of players")
      # Red Card
        elif (event.find("div", class_="event_icon yellow_red_card")):
            timing = event.find_all("div")[0].text
            timing = int(re.findall('[0-9]+', timing)[0])
            player = event.find_all("a", href = True)
            if (len(player) == 1):
              player = player[0].text
              home_team_red_card_timings.append([timing, player])
            elif (len(player) > 1):
              raise Exception("Red Card: Invalid number of players")

  # Away Team
    for event in away_team_events:
      # Goals
        if (event.find("div", class_="event_icon goal") or event.find("div", class_="event_icon penalty_goal")):
            text = event.find_all("div")[0].text
            text = re.findall('[0-9]+', text)[0]
            away_team_goal_timings.append(int(text))
      # Substitutes
        elif (event.find("div", class_="event_icon substitute_in")):
            sub_timing = event.find_all("div")[0].text
            sub_timing = int(re.findall('[0-9]+', sub_timing)[0])
            sub_players = event.find_all("a", href = True)
            # Only sub out
            if (len(sub_players) == 1):
              sub_in_player = "-"
              sub_out_player = sub_players[0].text
              away_team_sub_timings.append([sub_timing, sub_out_player, sub_in_player])
            # Sub out and sub in
            elif (len(sub_players) == 2):
              sub_in_player = sub_players[0].text
              sub_out_player = sub_players[1].text
              away_team_sub_timings.append([sub_timing, sub_out_player, sub_in_player])
            else:
              raise Exception("Substitutes: Invalid number of players")
      # Red Card
        elif (event.find("div", class_="event_icon yellow_red_card") or event.find("div", class_="event_icon red_card")):
            timing = event.find_all("div")[0].text
            timing = int(re.findall('[0-9]+', timing)[0])
            player = event.find_all("a", href = True)
            if (len(player) == 1):
              player = player[0].text
              away_team_red_card_timings.append([timing, player])
            elif (len(player) > 1):
              raise Exception("Red Card: Invalid number of players")
    
    return home_team_goal_timings, away_team_goal_timings, home_team_sub_timings, away_team_sub_timings, home_team_red_card_timings, away_team_red_card_timings

# Find match data
def find_match_data(url, df):
    data = requests.get(url)
    soup = BeautifulSoup(data.content, "html.parser")
    home_team, away_team = find_teams(soup)
    home_team_score, away_team_score = find_score(soup)
    home_starting_lineup, home_bench_lineup, away_starting_lineup, away_bench_lineup = find_lineup(soup)
    home_team_goal_timings, away_team_goal_timings, home_team_sub_timings, away_team_sub_timings, home_team_red_card_timings, away_team_red_card_timings = find_event_timings(soup)

    df = df.append({
        "Season": "test",
        "Home Team": home_team,
        "Away Team": away_team,
        "Home Team Score": home_team_score,
        "Away Team Score": away_team_score,
        "Home Starting Lineup": home_starting_lineup,
        "Home Bench Lineup": home_bench_lineup,
        "Away Starting Lineup": away_starting_lineup,
        "Away Bench Lineup": away_bench_lineup,
        "Home Goal Timings": home_team_goal_timings,
        "Away Goal Timings": away_team_goal_timings,
        "Home Sub Timings": home_team_sub_timings,
        "Away Sub Timings": away_team_sub_timings,
        "Home Red Card Timings": home_team_red_card_timings,
        "Away Red Card Timings": away_team_red_card_timings
    }, ignore_index = True)

    return df

In [3]:
# Write match data
def write_match_data(source_file_path, dest_file_path, max_count, scheduler):
    print("Start of Function")
    df = pd.DataFrame(columns=["Season", "Home Team", "Away Team", "Home Team Score", "Away Team Score", "Home Starting Lineup", 
                               "Home Bench Lineup", "Away Starting Lineup", "Away Bench Lineup", "Home Goal Timings", 
                               "Away Goal Timings", "Home Sub Timings", "Away Sub Timings", "Home Red Card Timings",
                               "Away Red Card Timings"])
    
    source_df = pd.read_csv(source_file_path)
    season = source_df.at[0, 'Season']
    match_urls = ast.literal_eval(source_df.at[0, 'Match URL'])
    print("Season:", season)
    print("Original Length:", len(match_urls))

    # Shutdown scheduler
    if season == None:
        scheduler.shutdown()

    count = 0
    while (len(match_urls) > 0):        
        count+=1
        if count > max_count:
            break
        popped_url = match_urls.pop(0)
        print(popped_url)
        data = requests.get(popped_url)
        soup = BeautifulSoup(data.content, "html.parser")
        home_team, away_team = find_teams(soup)
        home_team_score, away_team_score = find_score(soup)
        home_starting_lineup, home_bench_lineup, away_starting_lineup, away_bench_lineup = find_lineup(soup)
        home_team_goal_timings, away_team_goal_timings, home_team_sub_timings, away_team_sub_timings, home_team_red_card_timings, away_team_red_card_timings = find_event_timings(soup)

        df = pd.concat([df, pd.DataFrame({
          "Season": [season],
          "Home Team": [home_team],
          "Away Team": [away_team],
          "Home Team Score": [home_team_score],
          "Away Team Score": [away_team_score],
          "Home Starting Lineup": [home_starting_lineup],
          "Home Bench Lineup": [home_bench_lineup],
          "Away Starting Lineup": [away_starting_lineup],
          "Away Bench Lineup": [away_bench_lineup],
          "Home Goal Timings": [home_team_goal_timings],
          "Away Goal Timings": [away_team_goal_timings],
          "Home Sub Timings": [home_team_sub_timings],
          "Away Sub Timings": [away_team_sub_timings],
          "Home Red Card Timings": [home_team_red_card_timings],
          "Away Red Card Timings": [away_team_red_card_timings]
        })], ignore_index = True)
        
        # Edit the source_df to reflect the new ma
        source_df.at[0, 'Match URL'] = match_urls

        # Drop first row if empty
        if (len(match_urls) == 0):
            source_df = source_df.drop(0)
            source_df = source_df.reset_index(drop=True)
            try:
              season = source_df.at[0, 'Season']
              match_urls = ast.literal_eval(source_df.at[0, 'Match URL'])
            except:
                print("Last Season")
            
#     Edit the initial files
    source_df.to_csv(source_file_path, index = False)
    df.to_csv(dest_file_path, mode = "a", index = False)

    print("Season:", season)
    print("Final Length:", len(match_urls))
    print("End of Function")
    return df


In [4]:
if __name__ == "__main__":
    print("Start of Scraping")
    scheduler = BackgroundScheduler(daemon=True)
#     write_match_data("match_url_test.csv", "match_data_test.csv", 2, scheduler)
    scheduler.add_job(write_match_data, 'interval', args=["match_url_test.csv", "match_data_test.csv", 9, scheduler], minutes=1)
    scheduler.start()
    print("Scheduler running: ", scheduler.get_jobs())

Start of Scraping
Scheduler running:  [<Job (id=4b07658933df43878de791c0489bd90a name=write_match_data)>]
Start of Function
Season: 2016-2017
Original Length: 275
https://fbref.com/en/matches/61ae4b5a/North-London-Derby-Arsenal-Tottenham-Hotspur-November-6-2016-Premier-League
https://fbref.com/en/matches/88e5f133/Liverpool-Watford-November-6-2016-Premier-League
https://fbref.com/en/matches/f1ef05e1/Hull-City-Southampton-November-6-2016-Premier-League
https://fbref.com/en/matches/23a108bb/Swansea-City-Manchester-United-November-6-2016-Premier-League
https://fbref.com/en/matches/a73d5c6b/Leicester-City-West-Bromwich-Albion-November-6-2016-Premier-League
https://fbref.com/en/matches/abb1ca5c/Manchester-United-Arsenal-November-19-2016-Premier-League
https://fbref.com/en/matches/0713dea0/Crystal-Palace-Manchester-City-November-19-2016-Premier-League
https://fbref.com/en/matches/568c83cf/Everton-Swansea-City-November-19-2016-Premier-League
https://fbref.com/en/matches/93cbea2e/Sunderland-Hul

https://fbref.com/en/matches/05c6c17b/Hull-City-Everton-December-30-2016-Premier-League
https://fbref.com/en/matches/189c4fc2/Chelsea-Stoke-City-December-31-2016-Premier-League
https://fbref.com/en/matches/1b6ab29d/Leicester-City-West-Ham-United-December-31-2016-Premier-League
https://fbref.com/en/matches/604d7487/Southampton-West-Bromwich-Albion-December-31-2016-Premier-League
https://fbref.com/en/matches/89339d67/Burnley-Sunderland-December-31-2016-Premier-League
https://fbref.com/en/matches/a2feddf1/Swansea-City-Bournemouth-December-31-2016-Premier-League
Season: 2016-2017
Final Length: 194
End of Function
Start of Function
Season: 2016-2017
Original Length: 194
https://fbref.com/en/matches/db4797ac/Manchester-United-Middlesbrough-December-31-2016-Premier-League
https://fbref.com/en/matches/894a485d/Liverpool-Manchester-City-December-31-2016-Premier-League
https://fbref.com/en/matches/3a95bef3/Watford-Tottenham-Hotspur-January-1-2017-Premier-League
https://fbref.com/en/matches/b0f63

Season: 2016-2017
Final Length: 122
End of Function
Start of Function
Season: 2016-2017
Original Length: 122
https://fbref.com/en/matches/11c30a9e/Manchester-United-Bournemouth-March-4-2017-Premier-League
https://fbref.com/en/matches/80c3d431/West-Bromwich-Albion-Crystal-Palace-March-4-2017-Premier-League
https://fbref.com/en/matches/92c4eebc/Swansea-City-Burnley-March-4-2017-Premier-League
https://fbref.com/en/matches/989477be/Stoke-City-Middlesbrough-March-4-2017-Premier-League
https://fbref.com/en/matches/b02df7fd/Leicester-City-Hull-City-March-4-2017-Premier-League
https://fbref.com/en/matches/e0eca321/Watford-Southampton-March-4-2017-Premier-League
https://fbref.com/en/matches/f06bb82f/Liverpool-Arsenal-March-4-2017-Premier-League
https://fbref.com/en/matches/f3357cd6/Tottenham-Hotspur-Everton-March-5-2017-Premier-League
https://fbref.com/en/matches/5677c7ef/Sunderland-Manchester-City-March-5-2017-Premier-League
Season: 2016-2017
Final Length: 113
End of Function
Start of Function

https://fbref.com/en/matches/97ae959a/Stoke-City-West-Ham-United-April-29-2017-Premier-League
https://fbref.com/en/matches/ccf227b0/Crystal-Palace-Burnley-April-29-2017-Premier-League
Season: 2016-2017
Final Length: 41
End of Function
Start of Function
Season: 2016-2017
Original Length: 41
https://fbref.com/en/matches/9f85335f/Manchester-United-Swansea-City-April-30-2017-Premier-League
https://fbref.com/en/matches/03fb9728/Middlesbrough-Manchester-City-April-30-2017-Premier-League
https://fbref.com/en/matches/c0ed0645/Everton-Chelsea-April-30-2017-Premier-League
https://fbref.com/en/matches/cdec2a32/North-London-Derby-Tottenham-Hotspur-Arsenal-April-30-2017-Premier-League
https://fbref.com/en/matches/17af420d/Watford-Liverpool-May-1-2017-Premier-League
https://fbref.com/en/matches/ba9c15dd/West-Ham-United-Tottenham-Hotspur-May-5-2017-Premier-League
https://fbref.com/en/matches/b8c8a428/Manchester-City-Crystal-Palace-May-6-2017-Premier-League
https://fbref.com/en/matches/2250900c/Burnle

https://fbref.com/en/matches/f25a110f/Manchester-City-Watford-August-29-2015-Premier-League
https://fbref.com/en/matches/9fef9371/Tottenham-Hotspur-Everton-August-29-2015-Premier-League
https://fbref.com/en/matches/8049f81a/Southampton-Norwich-City-August-30-2015-Premier-League
https://fbref.com/en/matches/513236ec/Swansea-City-Manchester-United-August-30-2015-Premier-League
Season: 2015-2016
Final Length: 340
End of Function
Start of Function
Season: 2015-2016
Original Length: 340
https://fbref.com/en/matches/4e2b0c5e/Everton-Chelsea-September-12-2015-Premier-League
https://fbref.com/en/matches/602d69bd/Norwich-City-Bournemouth-September-12-2015-Premier-League
https://fbref.com/en/matches/886b884e/Arsenal-Stoke-City-September-12-2015-Premier-League
https://fbref.com/en/matches/8c27d5d1/Watford-Swansea-City-September-12-2015-Premier-League
https://fbref.com/en/matches/9954343e/West-Bromwich-Albion-Southampton-September-12-2015-Premier-League
https://fbref.com/en/matches/dad43a5a/Crysta

Season: 2015-2016
Final Length: 268
End of Function
Start of Function
Season: 2015-2016
Original Length: 268
https://fbref.com/en/matches/285d483a/Sunderland-Southampton-November-7-2015-Premier-League
https://fbref.com/en/matches/664143dd/Leicester-City-Watford-November-7-2015-Premier-League
https://fbref.com/en/matches/9351699f/Norwich-City-Swansea-City-November-7-2015-Premier-League
https://fbref.com/en/matches/b9fb22c9/Manchester-United-West-Bromwich-Albion-November-7-2015-Premier-League
https://fbref.com/en/matches/1fd87946/Stoke-City-Chelsea-November-7-2015-Premier-League
https://fbref.com/en/matches/1eda4c4b/Aston-Villa-Manchester-City-November-8-2015-Premier-League
https://fbref.com/en/matches/89ea5895/Liverpool-Crystal-Palace-November-8-2015-Premier-League
https://fbref.com/en/matches/a933a6f4/North-London-Derby-Arsenal-Tottenham-Hotspur-November-8-2015-Premier-League
https://fbref.com/en/matches/7e046c61/Watford-Manchester-United-November-21-2015-Premier-League
Season: 2015-20

https://fbref.com/en/matches/ee5182b1/Manchester-United-Chelsea-December-28-2015-Premier-League
https://fbref.com/en/matches/02a152cf/Leicester-City-Manchester-City-December-29-2015-Premier-League
https://fbref.com/en/matches/034b8c78/Sunderland-Liverpool-December-30-2015-Premier-League
https://fbref.com/en/matches/22bd65d5/West-Ham-United-Liverpool-January-2-2016-Premier-League
https://fbref.com/en/matches/151c29cc/Norwich-City-Southampton-January-2-2016-Premier-League
https://fbref.com/en/matches/361c2104/Manchester-United-Swansea-City-January-2-2016-Premier-League
Season: 2015-2016
Final Length: 187
End of Function
Start of Function
Season: 2015-2016
Original Length: 187
https://fbref.com/en/matches/994b8926/Arsenal-Newcastle-United-January-2-2016-Premier-League
https://fbref.com/en/matches/a5cf696a/West-Bromwich-Albion-Stoke-City-January-2-2016-Premier-League
https://fbref.com/en/matches/a9494dac/Sunderland-Aston-Villa-January-2-2016-Premier-League
https://fbref.com/en/matches/d62b

https://fbref.com/en/matches/bf4ae13b/Southampton-Chelsea-February-27-2016-Premier-League
Season: 2015-2016
Final Length: 115
End of Function
Start of Function
Season: 2015-2016
Original Length: 115
https://fbref.com/en/matches/38358fdf/West-Bromwich-Albion-Crystal-Palace-February-27-2016-Premier-League
https://fbref.com/en/matches/5d8bf07d/Manchester-United-Arsenal-February-28-2016-Premier-League
https://fbref.com/en/matches/a30cc6cb/Tottenham-Hotspur-Swansea-City-February-28-2016-Premier-League
https://fbref.com/en/matches/24f5b225/Leicester-City-West-Bromwich-Albion-March-1-2016-Premier-League
https://fbref.com/en/matches/268c182d/Norwich-City-Chelsea-March-1-2016-Premier-League
https://fbref.com/en/matches/6c691212/Aston-Villa-Everton-March-1-2016-Premier-League
https://fbref.com/en/matches/b9214ee0/Bournemouth-Southampton-March-1-2016-Premier-League
https://fbref.com/en/matches/fd8e6f5d/Sunderland-Crystal-Palace-March-1-2016-Premier-League
https://fbref.com/en/matches/3087d86f/Sto

https://fbref.com/en/matches/f234ece6/Liverpool-Newcastle-United-April-23-2016-Premier-League
https://fbref.com/en/matches/2f86d664/Sunderland-Arsenal-April-24-2016-Premier-League
https://fbref.com/en/matches/98683761/Leicester-City-Swansea-City-April-24-2016-Premier-League
https://fbref.com/en/matches/bd22b7ce/Tottenham-Hotspur-West-Bromwich-Albion-April-25-2016-Premier-League
Season: 2015-2016
Final Length: 34
End of Function
Start of Function
Season: 2015-2016
Original Length: 34
https://fbref.com/en/matches/4f898f0c/Everton-Bournemouth-April-30-2016-Premier-League
https://fbref.com/en/matches/61031e0b/Watford-Aston-Villa-April-30-2016-Premier-League
https://fbref.com/en/matches/632fda2d/West-Bromwich-Albion-West-Ham-United-April-30-2016-Premier-League
https://fbref.com/en/matches/90eab69c/Stoke-City-Sunderland-April-30-2016-Premier-League
https://fbref.com/en/matches/b3b21078/Newcastle-United-Crystal-Palace-April-30-2016-Premier-League
https://fbref.com/en/matches/ab4dc164/Arsenal-

https://fbref.com/en/matches/e8592bc3/Queens-Park-Rangers-Stoke-City-September-20-2014-Premier-League
https://fbref.com/en/matches/2aa55b80/Burnley-Sunderland-September-20-2014-Premier-League
https://fbref.com/en/matches/2bfda5d1/Newcastle-United-Hull-City-September-20-2014-Premier-League
https://fbref.com/en/matches/9e4958a0/Swansea-City-Southampton-September-20-2014-Premier-League
https://fbref.com/en/matches/d9045ddc/Aston-Villa-Arsenal-September-20-2014-Premier-League
https://fbref.com/en/matches/26b5177c/West-Ham-United-Liverpool-September-20-2014-Premier-League
https://fbref.com/en/matches/0d5ba6a7/Tottenham-Hotspur-West-Bromwich-Albion-September-21-2014-Premier-League
Season: 2014-2015
Final Length: 333
End of Function
Start of Function
Season: 2014-2015
Original Length: 333
https://fbref.com/en/matches/252a79bc/Leicester-City-Manchester-United-September-21-2014-Premier-League
https://fbref.com/en/matches/3e81011b/Everton-Crystal-Palace-September-21-2014-Premier-League
https://f

https://fbref.com/en/matches/c8406d8f/Arsenal-Manchester-United-November-22-2014-Premier-League
https://fbref.com/en/matches/e9b0c1d2/Crystal-Palace-Liverpool-November-23-2014-Premier-League
https://fbref.com/en/matches/10500703/Hull-City-Tottenham-Hotspur-November-23-2014-Premier-League
Season: 2014-2015
Final Length: 261
End of Function
Start of Function
Season: 2014-2015
Original Length: 261
https://fbref.com/en/matches/d0ab7eae/Aston-Villa-Southampton-November-24-2014-Premier-League
https://fbref.com/en/matches/19ef953c/West-Bromwich-Albion-Arsenal-November-29-2014-Premier-League
https://fbref.com/en/matches/18161cf8/Manchester-United-Hull-City-November-29-2014-Premier-League
https://fbref.com/en/matches/1923abf0/Liverpool-Stoke-City-November-29-2014-Premier-League
https://fbref.com/en/matches/219bc6c4/Swansea-City-Crystal-Palace-November-29-2014-Premier-League
https://fbref.com/en/matches/39d8741c/Burnley-Aston-Villa-November-29-2014-Premier-League
https://fbref.com/en/matches/6a2

Execution of job "write_match_data (trigger: interval[0:01:00], next run at: 2023-04-20 14:02:15 +08)" skipped: maximum number of running instances reached (1)


https://fbref.com/en/matches/482ab595/Liverpool-Arsenal-December-21-2014-Premier-League
https://fbref.com/en/matches/6d533062/Stoke-City-Chelsea-December-22-2014-Premier-League
https://fbref.com/en/matches/5160cce2/Chelsea-West-Ham-United-December-26-2014-Premier-League
https://fbref.com/en/matches/12d817fa/Leicester-City-Tottenham-Hotspur-December-26-2014-Premier-League
https://fbref.com/en/matches/14328b91/Manchester-United-Newcastle-United-December-26-2014-Premier-League
Season: 2014-2015
Final Length: 207
End of Function
Start of Function
Season: 2014-2015
Original Length: 207
https://fbref.com/en/matches/72f0bbf0/Burnley-Liverpool-December-26-2014-Premier-League
https://fbref.com/en/matches/764a4ae0/West-Bromwich-Albion-Manchester-City-December-26-2014-Premier-League
https://fbref.com/en/matches/85542f1b/Swansea-City-Aston-Villa-December-26-2014-Premier-League
https://fbref.com/en/matches/b3f106fa/Sunderland-Hull-City-December-26-2014-Premier-League
https://fbref.com/en/matches/b8

Season: 2014-2015
Final Length: 135
End of Function
Start of Function
Season: 2014-2015
Original Length: 135
https://fbref.com/en/matches/30b4c5f8/Chelsea-Everton-February-11-2015-Premier-League
https://fbref.com/en/matches/878d33b1/Stoke-City-Manchester-City-February-11-2015-Premier-League
https://fbref.com/en/matches/ba02f26d/Manchester-United-Burnley-February-11-2015-Premier-League
https://fbref.com/en/matches/bd63e5b9/Crystal-Palace-Newcastle-United-February-11-2015-Premier-League
https://fbref.com/en/matches/d1a68304/West-Bromwich-Albion-Swansea-City-February-11-2015-Premier-League
https://fbref.com/en/matches/0090e785/Crystal-Palace-Arsenal-February-21-2015-Premier-League
https://fbref.com/en/matches/425343a3/Swansea-City-Manchester-United-February-21-2015-Premier-League
https://fbref.com/en/matches/6d16a577/Sunderland-West-Bromwich-Albion-February-21-2015-Premier-League
https://fbref.com/en/matches/b7498190/Chelsea-Burnley-February-21-2015-Premier-League
Season: 2014-2015
Final 

https://fbref.com/en/matches/b5a4ce75/Crystal-Palace-West-Bromwich-Albion-April-18-2015-Premier-League
https://fbref.com/en/matches/ec94c29f/Stoke-City-Southampton-April-18-2015-Premier-League
https://fbref.com/en/matches/a8e39fbf/Chelsea-Manchester-United-April-18-2015-Premier-League
https://fbref.com/en/matches/10c8fef2/Manchester-City-West-Ham-United-April-19-2015-Premier-League
Season: 2014-2015
Final Length: 54
End of Function
Start of Function
Season: 2014-2015
Original Length: 54
https://fbref.com/en/matches/9ada192c/Newcastle-United-Tottenham-Hotspur-April-19-2015-Premier-League
https://fbref.com/en/matches/0b1d3c80/Southampton-Tottenham-Hotspur-April-25-2015-Premier-League
https://fbref.com/en/matches/000804a9/Burnley-Leicester-City-April-25-2015-Premier-League
https://fbref.com/en/matches/0ba7b050/Stoke-City-Sunderland-April-25-2015-Premier-League
https://fbref.com/en/matches/3d5347ef/Queens-Park-Rangers-West-Ham-United-April-25-2015-Premier-League
https://fbref.com/en/matche

https://fbref.com/en/matches/2f4fb646/Manchester-United-Chelsea-August-26-2013-Premier-League
https://fbref.com/en/matches/0a96cd99/West-Ham-United-Stoke-City-August-31-2013-Premier-League
https://fbref.com/en/matches/1a9cda6c/Norwich-City-Southampton-August-31-2013-Premier-League
https://fbref.com/en/matches/62acbb63/Crystal-Palace-Sunderland-August-31-2013-Premier-League
https://fbref.com/en/matches/a136a9eb/Cardiff-City-Everton-August-31-2013-Premier-League
https://fbref.com/en/matches/c204a78b/Manchester-City-Hull-City-August-31-2013-Premier-League
https://fbref.com/en/matches/eb25ce01/Newcastle-United-Fulham-August-31-2013-Premier-League
Season: 2013-2014
Final Length: 353
End of Function
Start of Function
Season: 2013-2014
Original Length: 353
https://fbref.com/en/matches/12d5977f/North-London-Derby-Arsenal-Tottenham-Hotspur-September-1-2013-Premier-League
https://fbref.com/en/matches/79139453/North-West-Derby-Liverpool-Manchester-United-September-1-2013-Premier-League
https://fb

https://fbref.com/en/matches/f03ad0bc/West-Bromwich-Albion-Crystal-Palace-November-2-2013-Premier-League
https://fbref.com/en/matches/25580703/Everton-Tottenham-Hotspur-November-3-2013-Premier-League
Season: 2013-2014
Final Length: 281
End of Function
Start of Function
Season: 2013-2014
Original Length: 281
https://fbref.com/en/matches/4bb2f946/Cardiff-City-Swansea-City-November-3-2013-Premier-League
https://fbref.com/en/matches/5023e438/Liverpool-Fulham-November-9-2013-Premier-League
https://fbref.com/en/matches/552e724e/Crystal-Palace-Everton-November-9-2013-Premier-League
https://fbref.com/en/matches/9e4346af/Chelsea-West-Bromwich-Albion-November-9-2013-Premier-League
https://fbref.com/en/matches/a47e8688/Norwich-City-West-Ham-United-November-9-2013-Premier-League
https://fbref.com/en/matches/aba4aa88/Southampton-Hull-City-November-9-2013-Premier-League
https://fbref.com/en/matches/b252c5f1/Aston-Villa-Cardiff-City-November-9-2013-Premier-League
https://fbref.com/en/matches/1c479690

https://fbref.com/en/matches/4c5651eb/Chelsea-Swansea-City-December-26-2013-Premier-League
https://fbref.com/en/matches/6b19615e/Manchester-City-Liverpool-December-26-2013-Premier-League
https://fbref.com/en/matches/774e2e93/Hull-City-Manchester-United-December-26-2013-Premier-League
https://fbref.com/en/matches/7e596e09/Tottenham-Hotspur-West-Bromwich-Albion-December-26-2013-Premier-League
https://fbref.com/en/matches/bdffa3ae/Newcastle-United-Stoke-City-December-26-2013-Premier-League
https://fbref.com/en/matches/ee1e6301/Everton-Sunderland-December-26-2013-Premier-League
https://fbref.com/en/matches/ef9a5676/Cardiff-City-Southampton-December-26-2013-Premier-League
Season: 2013-2014
Final Length: 200
End of Function
Start of Function
Season: 2013-2014
Original Length: 200
https://fbref.com/en/matches/0b5215cb/West-Ham-United-West-Bromwich-Albion-December-28-2013-Premier-League
https://fbref.com/en/matches/24bd9967/Aston-Villa-Swansea-City-December-28-2013-Premier-League
https://fbref

https://fbref.com/en/matches/ba148bbd/Hull-City-Southampton-February-11-2014-Premier-League
https://fbref.com/en/matches/cef0cbd9/West-Bromwich-Albion-Chelsea-February-11-2014-Premier-League
Season: 2013-2014
Final Length: 128
End of Function
Start of Function
Season: 2013-2014
Original Length: 128
https://fbref.com/en/matches/ee2bf112/Cardiff-City-Aston-Villa-February-11-2014-Premier-League
https://fbref.com/en/matches/f758ce2a/West-Ham-United-Norwich-City-February-11-2014-Premier-League
https://fbref.com/en/matches/2407b302/Newcastle-United-Tottenham-Hotspur-February-12-2014-Premier-League
https://fbref.com/en/matches/97e83941/Arsenal-Manchester-United-February-12-2014-Premier-League
https://fbref.com/en/matches/cd9d99f8/Stoke-City-Swansea-City-February-12-2014-Premier-League
https://fbref.com/en/matches/fc273704/Fulham-Liverpool-February-12-2014-Premier-League
https://fbref.com/en/matches/0865283f/Arsenal-Sunderland-February-22-2014-Premier-League
https://fbref.com/en/matches/4585cf

https://fbref.com/en/matches/397540bb/Sunderland-Everton-April-12-2014-Premier-League
https://fbref.com/en/matches/5dbc2c23/Southampton-Cardiff-City-April-12-2014-Premier-League
https://fbref.com/en/matches/b7aee726/West-Bromwich-Albion-Tottenham-Hotspur-April-12-2014-Premier-League
https://fbref.com/en/matches/caed3efd/Crystal-Palace-Aston-Villa-April-12-2014-Premier-League
https://fbref.com/en/matches/e324829e/Stoke-City-Newcastle-United-April-12-2014-Premier-League
https://fbref.com/en/matches/8523eee1/Liverpool-Manchester-City-April-13-2014-Premier-League
Season: 2013-2014
Final Length: 47
End of Function
Start of Function
Season: 2013-2014
Original Length: 47
https://fbref.com/en/matches/da4923dd/Swansea-City-Chelsea-April-13-2014-Premier-League
https://fbref.com/en/matches/9efe3d9a/Arsenal-West-Ham-United-April-15-2014-Premier-League
https://fbref.com/en/matches/1b26a871/Manchester-City-Sunderland-April-16-2014-Premier-League
https://fbref.com/en/matches/b2801d86/Everton-Crystal-

https://fbref.com/en/matches/0b68b804/Liverpool-Arsenal-September-2-2012-Premier-League
https://fbref.com/en/matches/371d6528/Newcastle-United-Aston-Villa-September-2-2012-Premier-League
https://fbref.com/en/matches/d80ac534/Southampton-Manchester-United-September-2-2012-Premier-League
https://fbref.com/en/matches/02581161/Stoke-City-Manchester-City-September-15-2012-Premier-League
https://fbref.com/en/matches/0b68caed/Fulham-West-Bromwich-Albion-September-15-2012-Premier-League
https://fbref.com/en/matches/2000534c/Queens-Park-Rangers-Chelsea-September-15-2012-Premier-League
https://fbref.com/en/matches/5a0b388e/Norwich-City-West-Ham-United-September-15-2012-Premier-League
https://fbref.com/en/matches/5fbcbf51/Sunderland-Liverpool-September-15-2012-Premier-League
Season: 2012-2013
Final Length: 346
End of Function
Start of Function
Season: 2012-2013
Original Length: 346
https://fbref.com/en/matches/97e07563/Arsenal-Southampton-September-15-2012-Premier-League
https://fbref.com/en/matc

### Test scheduling

In [10]:
# def my_task1():
#     print('Task 1\n')
# if __name__=='__main__':
#      scheduler = BackgroundScheduler(daemon=True)
#      scheduler.add_job(my_task1, 'interval', id='my_task1', minutes=1)
#      scheduler.start()

Task 1



In [8]:
scheduler.shutdown()

SchedulerNotRunningError: Scheduler is not running

### Test rate-limited

In [13]:
url = "https://fbref.com/en/matches/3adf2aa7/Brentford-Arsenal-August-13-2021-Premier-League"
data = requests.get(url)
soup = BeautifulSoup(data.content, "html.parser")
# find_teams(soup)
# print(soup)


<!DOCTYPE html>

<html class="no-js" data-root="/home/fb/deploy/www/base" data-version="klecko-" lang="en">
<head>
<meta charset="utf-8"/>
<meta content="ie=edge" http-equiv="x-ua-compatible"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=2.0" name="viewport">
<link href="https://cdn.ssref.net/req/202304181" rel="dns-prefetch"/>
<!-- Quantcast Choice. Consent Manager Tag v2.0 (for TCF 2.0) -->
<script async="true" type="text/javascript">
    (function() {
	var host = window.location.hostname;
	var element = document.createElement('script');
	var firstScript = document.getElementsByTagName('script')[0];
	var url = 'https://cmp.quantcast.com'
	    .concat('/choice/', 'XwNYEpNeFfhfr', '/', host, 
		    '/choice.js?tag_version=V2');
	var uspTries = 0;
	var uspTriesLimit = 3;
	element.async = true;
	element.type = 'text/javascript';
	element.src = url;
	
	firstScript.parentNode.insertBefore(element, firstScript);
	
	function makeStub() {
	    var TCF_LOCATOR_NAME = '

Task 1

Task 1



### English Premier League Website

In [14]:
standing_url = 'https://www.premierleague.com/match/75185'
data = requests.get(standing_url)
soup = BeautifulSoup(data.content, "html.parser")
# soup

Task 1

Task 1



In [3]:
# Returned data: 2 element array; first element: home_score; second element: away_score; ie [home_score, away_score]
def obtain_score(soup):
    score = soup.find("div", class_="score fullTime").text.split('-')
    return score
    
home_score, away_score = obtain_score(soup)
# score = soup.find("div", class_="score fullTime")
# score.text

In [4]:
# Obtain team names
def obtain_team_names(soup, side):
    if side == "home":
        return soup.find("div", class_="team home").find("a", class_="teamName").find("span",class_="long").text
    elif side == "away":
        return soup.find("div", class_="team away").find("a", class_="teamName").find("span",class_="long").text

    
home_team = obtain_team_names(soup, "home")
away_team = obtain_team_names(soup, "away")

In [14]:
# Obtain goal timings
# team: "home" or "away"
def obtain_goal_timings(soup, side):
    goal_timing = []
    
    if side == "home":
        # Obtain goal events for home team
        goal_tags = soup.find("div", class_="matchEvents matchEventsContainer").find("div", class_="home").find_all("div", class_="event")
    elif side == "away":
        goal_tags = soup.find("div", class_="matchEvents matchEventsContainer").find("div", class_="away").find_all("div", class_="event")
        
    # Obtain time of goals
    for tags in goal_tags:
    #     print(goal.find("a").text)  # Obtain scorer names
        goal_time = tags.get_text(strip=True)
        goal_time = re.findall(r'\d+', goal_time)[0]
        goal_timing.append(goal_time)
        
    return goal_timing

home_goal_timings = obtain_goal_timings(soup,"home")
away_goal_timings = obtain_goal_timings(soup,"away")

In [10]:
def obtain_playing_home_team(soup):
    data = []
    home_lineup = soup.find_all("ul", class_="startingLineUpContainer squadList home")
    starting_players_tags = home_lineup[0].find_all('div', class_="name")
    sub_players_tags= home_lineup[1].find_all('div', class_="name")

#     Starting players
    for tag in starting_players_tags:
        player_name = tag.get_text(strip=True)
        player_name = re.sub(r'\d+\'', '', player_name).strip()

        if tag.find('span', class_='sub'):
            unformatted_sub_time = tag.find('span', class_='sub').text
            formatted_sub_time = int(re.sub("'", "", unformatted_sub_time))
            data.append([player_name, 0, formatted_sub_time, formatted_sub_time-0])
            
        else:
            formatted_sub_time = 90
            data.append([player_name, 0, formatted_sub_time, formatted_sub_time-0])
    
#     Substituted players
    for tag in sub_players_tags:
        player_name = tag.get_text(strip=True)
        player_name = re.sub(r'\d+\'', '', player_name).strip()
        
        if tag.find('span', class_='sub'):
            unformatted_sub_time = tag.find('span', class_='sub').text
            formatted_sub_time = int(re.sub("'", "", unformatted_sub_time))
            data.append([player_name, formatted_sub_time, 90, 90-formatted_sub_time])
        else:
            data.append([player_name, 0, 0, 0])
    
    return data

obtain_playing_home_team(soup)

[['Kepa Arrizabalaga', 0, 90, 90],
 ['Benoît Badiashile', 0, 90, 90],
 ['Wesley Fofana', 0, 87, 87],
 ['Kalidou Koulibaly', 0, 90, 90],
 ['Ben Chilwell', 0, 90, 90],
 ['Reece James', 0, 90, 90],
 ['Mateo Kovacic', 0, 81, 81],
 ['Enzo Fernández', 0, 90, 90],
 ['Christian Pulisic', 0, 62, 62],
 ['Kai Havertz', 0, 90, 90],
 ['João Félix', 0, 86, 86],
 ['Marcus Bettinelli', 0, 0, 0],
 ['Marc Cucurella', 0, 0, 0],
 ['Trevoh Chalobah', 87, 90, 3],
 ["N'Golo Kanté", 0, 0, 0],
 ['Mykhailo Mudryk', 0, 0, 0],
 ['Carney Chukwuemeka', 86, 90, 4],
 ['Ruben Loftus-Cheek', 81, 90, 9],
 ['Conor Gallagher', 62, 90, 28],
 ['Noni Madueke', 0, 0, 0]]

In [12]:
# Return data: name_of_player, start_time, end_time, playing_time
def obtain_playing_time_away_team(soup):
    data = []
    away_lineup = soup.find("div", class_="teamList mcLineUpContainter awayLineup")
    starting_players_tags = away_lineup.find_all("ul", class_="startingLineUpContainer squadList")[0].find_all('div', class_="name")
    sub_players_tags = away_lineup.find_all("ul", class_="startingLineUpContainer squadList")[1].find_all('div', class_="name")
    
#     Starting players
    for tag in starting_players_tags:
        player_name = tag.get_text(strip=True)
        player_name = re.sub(r'\d+\'', '', player_name).strip()

        if tag.find('span', class_='sub'):
            unformatted_sub_time = tag.find('span', class_='sub').text
            formatted_sub_time = int(re.sub("'", "", unformatted_sub_time))
            data.append([player_name, 0, formatted_sub_time, formatted_sub_time-0])
        else:
            formatted_sub_time = 90
            data.append([player_name, 0, formatted_sub_time, formatted_sub_time-0])
    
#     Substituted players
    for tag in sub_players_tags:
        player_name = tag.get_text(strip=True)
        player_name = re.sub(r'\d+\'', '', player_name).strip()

        if tag.find('span', class_='sub'):
            unformatted_sub_time = tag.find('span', class_='sub').text
            formatted_sub_time = int(re.sub("'", "", unformatted_sub_time))
            data.append([player_name, formatted_sub_time, 90, 90-formatted_sub_time])
        else:
            data.append([player_name, 0, 0, 0])

    return data

obtain_playing_time_away_team(soup)

[['Jordan Pickford', 0, 90, 90],
 ['James Tarkowski', 0, 90, 90],
 ['Michael Keane', 0, 90, 90],
 ['Ben Godfrey', 0, 90, 90],
 ['Séamus Coleman', 0, 84, 84],
 ['Alex Iwobi', 0, 90, 90],
 ['Amadou Onana', 0, 90, 90],
 ['Abdoulaye Doucouré', 0, 90, 90],
 ['Dwight McNeil', 0, 90, 90],
 ['Idrissa Gueye', 0, 79, 79],
 ['Demarai Gray', 0, 90, 90],
 ['Asmir Begovic', 0, 0, 0],
 ['Vitalii Mykolenko', 84, 90, 6],
 ['Yerry Mina', 0, 0, 0],
 ['Conor Coady', 0, 0, 0],
 ['Mason Holgate', 0, 0, 0],
 ['James Garner', 0, 0, 0],
 ['Tom Davies', 0, 0, 0],
 ['Neal Maupay', 0, 0, 0],
 ['Ellis Simms', 79, 90, 11]]

In [17]:
data = {
    "Home Team": [home_team],
    "Away Team": [away_team],
    "Home Score": [home_score],
    "Away Score": [away_score],
    "Home Goal Timings": [home_goal_timings],
    "Away Goal Timings": [away_goal_timings]
}


df = pd.DataFrame(data)
df

Unnamed: 0,Home Team,Away Team,Home Score,Away Score,Home Goal Timings,Away Goal Timings
0,Chelsea,Everton,2,2,"[52, 76]","[69, 89]"
