In [1]:
from bs4 import BeautifulSoup
import requests


## Get all premier league teams

In [40]:
def get_teams():
    '''Return a dict reppresenting the current league table'''
    html_pl_teams = requests.get('https://fbref.com/en/comps/9/Premier-League-Stats').text
    soup = BeautifulSoup(html_pl_teams, 'lxml')
    teams_soup = soup.find_all('table', class_='stats_table sortable min_width force_mobilize')[1]
    teams_soup_teams = teams_soup.find_all('a')
    teams_soup_img = teams_soup.find_all('img')
    teams = {}

    for team, img, i in zip(teams_soup_teams, teams_soup_img, range(1, len(teams_soup_teams)+1)):
        teams[i] = team.text

        img_data = requests.get(img['src']).content 

        with open(f'img/{team.text}.jpg', 'wb') as handler: 

               handler.write(img_data)
    return teams

print(get_teams())

{1: 'Arsenal', 2: 'Manchester City', 3: 'Newcastle Utd', 4: 'Manchester Utd', 5: 'Aston Villa', 6: 'Tottenham', 7: 'Liverpool', 8: 'Brighton', 9: 'Fulham', 10: 'Brentford', 11: 'Chelsea', 12: 'Crystal Palace', 13: 'Wolves', 14: 'West Ham', 15: 'Bournemouth', 16: 'Leeds United', 17: 'Leicester City', 18: 'Everton', 19: "Nott'ham Forest", 20: 'Southampton'}


## Get all premier league matches

In [35]:
def get_matches():
    html_pl_teams = requests.get('https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures').text
    soup = BeautifulSoup(html_pl_teams, 'lxml')
    row_soup = soup.find_all('tr')
    matches = []
    for tr in row_soup:
        week = tr.find('th', {'data-stat':'gameweek'}).text
        if week and week.isnumeric():
            team1 = tr.find('td', {'data-stat':'home_team'}).text
            team2 = tr.find('td', {'data-stat':'away_team'}).text
            score = tr.find('td', {'data-stat':'score'}).text
            date = tr.find('td', {'data-stat':'date'}).text 
            matches.append((team1, score, team2, week, date))
    return matches
for match in get_matches():
    print(match)

('Crystal Palace', '0–2', 'Arsenal', '1', '2022-08-05')
('Fulham', '2–2', 'Liverpool', '1', '2022-08-06')
('Tottenham', '4–1', 'Southampton', '1', '2022-08-06')
('Newcastle Utd', '2–0', "Nott'ham Forest", '1', '2022-08-06')
('Leeds United', '2–1', 'Wolves', '1', '2022-08-06')
('Bournemouth', '2–0', 'Aston Villa', '1', '2022-08-06')
('Everton', '0–1', 'Chelsea', '1', '2022-08-06')
('Leicester City', '2–2', 'Brentford', '1', '2022-08-07')
('Manchester Utd', '1–2', 'Brighton', '1', '2022-08-07')
('West Ham', '0–2', 'Manchester City', '1', '2022-08-07')
('Aston Villa', '2–1', 'Everton', '2', '2022-08-13')
('Manchester City', '4–0', 'Bournemouth', '2', '2022-08-13')
('Southampton', '2–2', 'Leeds United', '2', '2022-08-13')
('Wolves', '0–0', 'Fulham', '2', '2022-08-13')
('Arsenal', '4–2', 'Leicester City', '2', '2022-08-13')
('Brighton', '0–0', 'Newcastle Utd', '2', '2022-08-13')
('Brentford', '4–0', 'Manchester Utd', '2', '2022-08-13')
("Nott'ham Forest", '1–0', 'West Ham', '2', '2022-08-14

In [71]:
def get_matches(team : str):
    html_pl_teams = requests.get(f'https://fbref.com/en/squads/18bb7c10/{team}-Stats').text
    soup = BeautifulSoup(html_pl_teams, 'lxml')
    table_soup = soup.find('table', id="matchlogs_for")
    row_soup = table_soup.find_all('tr')
    matches = []
    week = 0
    for tr in row_soup:
        league = tr.find('td', {'data-stat':'comp'})
        if league and league.text == "Premier League":
            week += 1
            venue = tr.find('td', {'data-stat':'venue'})
            if venue and venue.text == "Home":
                team1 = team
                team2 = tr.find('td', {'data-stat':'opponent'}).text
                gf = tr.find('th', {'data-stat':'goals_for'})
                ga = tr.find('th', {'data-stat':'goals_against'})
                if gf:
                    score = f'{gf.text}-{ga.text}'
                else:
                    score = ''
                date = tr.find('th', {'data-stat':'date'}).text
                matches.append((team1, score, team2, week, date))
            else:
                team1 = tr.find('td', {'data-stat':'opponent'}).text
                team2 = team
                ga = tr.find('th', {'data-stat':'goals_for'})
                gf = tr.find('th', {'data-stat':'goals_against'})
                if gf:
                    score = f'{gf.text}-{ga.text}'
                else:
                    score = ''
                date = tr.find('th', {'data-stat':'date'}).text
                matches.append((team1, score, team2, week, date))
    return matches
    
get_matches("Arsenal")

[('Crystal Palace', '', 'Arsenal', 1, '2022-08-05'),
 ('Arsenal', '', 'Leicester City', 2, '2022-08-13'),
 ('Bournemouth', '', 'Arsenal', 3, '2022-08-20'),
 ('Arsenal', '', 'Fulham', 4, '2022-08-27'),
 ('Arsenal', '', 'Aston Villa', 5, '2022-08-31'),
 ('Manchester Utd', '', 'Arsenal', 6, '2022-09-04'),
 ('Brentford', '', 'Arsenal', 7, '2022-09-18'),
 ('Arsenal', '', 'Tottenham', 8, '2022-10-01'),
 ('Arsenal', '', 'Liverpool', 9, '2022-10-09'),
 ('Leeds United', '', 'Arsenal', 10, '2022-10-16'),
 ('Southampton', '', 'Arsenal', 11, '2022-10-23'),
 ('Arsenal', '', "Nott'ham Forest", 12, '2022-10-30'),
 ('Chelsea', '', 'Arsenal', 13, '2022-11-06'),
 ('Wolves', '', 'Arsenal', 14, '2022-11-12'),
 ('Arsenal', '', 'West Ham', 15, '2022-12-26'),
 ('Brighton', '', 'Arsenal', 16, '2022-12-31'),
 ('Arsenal', '', 'Newcastle Utd', 17, '2023-01-03'),
 ('Tottenham', '', 'Arsenal', 18, '2023-01-15'),
 ('Arsenal', '', 'Manchester Utd', 19, '2023-01-22'),
 ('Everton', '', 'Arsenal', 20, '2023-02-04'),
 (