In [42]:
# Packaged used across functions - others in respective functions.
from bs4 import BeautifulSoup as bsoup
import requests as reqs
import json
import os
from ipywidgets import widgets, Dropdown, interact, interact_manual, Button, Output
from IPython.display import display

In [43]:
# Create team JSON file for parsing for front-end.
# the b variable here is just placed for the button interaction in UI.
def team_capture(b):
    from datetime import date
    today = date.today().strftime('%Y%m%d')
    clubs_link = 'https://fbref.com/en/country/clubs/ENG/England-Football-Clubs'
    club_page = bsoup(reqs.get(clubs_link).content, 'html.parser')
    find_teams = club_page.find_all('tr')
    team_total = []
    file_name = 'team_list_' + str(today) + '.json'
    for x in find_teams[1:]:
        team_dict = {}
        add_team = x.find_next('th').get_text()
        team_dict['Team'] = add_team
        add_gen = x.find_next('td', attrs={"data-stat":"gender"}).get_text()
        team_dict['Gender'] = add_gen
        add_comp = x.find_next('td', attrs={"data-stat":"comp"}).get_text()
        team_dict['Competition'] = add_comp
        add_min = x.find_next('td', attrs={"data-stat":"min_season"}).get_text()
        team_dict['Earliest Season'] = add_min
        add_max = x.find_next('td', attrs={"data-stat":"max_season"}).get_text()
        team_dict['Latest Season'] = add_max
        add_total = x.find_next('td', attrs={"data-stat":"num_comps"}).get_text()
        team_dict['Total Seasons'] = add_total
        add_champs = x.find_next('td', attrs={"data-stat":"first_place_finishes"}).get_text()
        team_dict['Championships'] = add_champs
        add_names = x.find_next('td', attrs={"data-stat":"other_names"}).get_text()
        team_dict['Other Names'] = add_names
        # 'team_code' and 'team_prefix' for capturing season data later on.
        team_code = x.find_next('a').attrs['href']
        team_prefix = team_code[team_code.find('/history/') + 9:team_code.find('-and')]       
        team_code = team_code[11:team_code.find('/history/')]
        team_dict['Code'] = team_code
        team_dict['Prefix'] = team_prefix
        team_total.append(team_dict)
    # Write JSON file
    with open(file_name, 'w') as outfile:
        json.dump(team_total, outfile)
    with output:
        print("Team DB Updated, " + today)
    return

In [44]:
# Load team JSON file create above.
def data_set():
    data_filenames = [data_file for data_file in os.listdir() 
                      if data_file.endswith('.json')]
    with open(data_filenames[0]) as json_file:
        data = json.load(json_file)
    return data

In [45]:
# Gather league list for selection.
def league_names():
    league_list = []
    for x in data_set():
        if x['Competition'] is not None and x['Competition'] not in league_list:
            league_list.append(x['Competition'])
    return league_list[1:]

# Gather team list for selection.
def team_names(league_select):
    team_list = []
    for x in data_set():
        if x['Competition'] == league_select and x['Team'] not in team_list:
            team_list.append(x['Team'])
    return team_list

# Capture team details for parsing based on above choices.
def team_data(team_select):
    for x in data_set():
        if x['Team'] == team_select:
            return x

In [46]:
def parse_seasons(code,prefix,min_season):
    # Season to capture games for:
    season_to_parse = "https://fbref.com/en/squads/" + code + '/' + min_season + '/' + prefix
    # Parse season details.
    match_links = []
    season_page = bsoup(reqs.get(season_to_parse).content, 'html.parser')
    find_links = season_page.find_all('td',attrs={"data-stat":"match_report"})
    for x in find_links:
        add = x.find_next('a').attrs['href']
        match_links.append("https://fbref.com" + str(add))
    # Eventually change to game_data(match_links) to process game data.
    return game_data(match_links)

In [47]:
def game_data(match_links):
    from datetime import datetime
    match_dataset = {}
    for match in match_links[:1]:
        parse_page = bsoup(reqs.get(match).content, 'html.parser')
        # Date.
        find_date = parse_page.find("div",{"class":"scorebox_meta"}).find('a').get_text()
        date_adj = datetime.strptime(find_date[find_date.find(' ') + 1:], '%B %d, %Y')
        match_dataset['Date'] = date_adj.strftime('%Y-%m-%d')
        # Team names - capture from 'title'.
        find_teams = parse_page.find("title").get_text()
        match_dataset['Home'] = find_teams[:(find_teams.find('vs.') - 1)]
        match_dataset['Away'] = find_teams[(find_teams.find('vs.') + 4):find_teams.find('Match') - 1]
        # Score.
        find_scores = parse_page.find_all("div",{"class":"score"})
        print(find_scores)
    return match_dataset

In [48]:
### UI for league and team select
# Select league to pick team from.
league_box=Dropdown(
        options=league_names(),
        description='Pick League:',
        layout={'width': 'max-content'},
        style = {'description_width': 'initial'},
        disabled=False)

# Select team to parse, and capture data for.
team_box=Dropdown(
        description='Pick Team:',
        layout={'width': 'max-content'},
        style = {'description_width': 'initial'},
        disabled=False)

# Select team to parse, and capture data for.
season_box=Dropdown(
        description='Pick Season:',
# 2014-2015 season is the earliest instance with match data from the looks of it.
        options=['2014-2015','2015-2016',
                 '2016-2017','2017-2018',
                 '2018-2019','2019-2020','2020-2021',
                 '2021-2022'],
        value='2019-2020',
        layout={'width': 'max-content'},
        style = {'description_width': 'initial'},
        disabled=False)

scrape_button=Button(
        description='Update Team Data',
        style = {'description_width': 'initial'},
        disable=False)

output = widgets.Output()
display(scrape_button, output)
scrape_button.on_click(team_capture)

@interact(league = league_box)
def choose_both(league):
    team_box.options = team_names(league_box.value)
    return

@interact_manual(team = team_box, use_season = season_box)
def choose_team(team, use_season):
    return team_choice_cap(team_data(team),use_season)

def team_choice_cap(data_set, use_season):
    code = data_set['Code']
    prefix = data_set['Prefix']
    return parse_seasons(code,prefix,use_season)

Button(description='Update Team Data', style=ButtonStyle())

Output()

interactive(children=(Dropdown(description='Pick League:', layout=Layout(width='max-content'), options=('EFL L…

interactive(children=(Dropdown(description='Pick Team:', layout=Layout(width='max-content'), options=('Accring…