In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def generate_teams_id_dataframe(season: str) -> list:
    url = f'https://en.wikipedia.org/wiki/{season}_Premier_League'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the team names
    if season == '2019-20':
        table = soup.find('table', {'class': 'wikitable sortable'})
    else:
        table = soup.find('table', {'class': 'wikitable'})
    # Extract the team names from the table
    team_names = []
    for row in table.find_all('tr')[1:]:  # Skip the header row
        cells = row.find_all('td')
        if len(cells) > 0:
            team_name = cells[0].text.strip()
            if 'London' not in team_name:
                team_names.append(team_name.lower().replace('.', '').replace('fc', '').replace(' ', ''))

    return team_names

In [2]:
seasons = ['2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23']

In [3]:
def populate_team_dataframe(seasons: list) -> pd.DataFrame:
    teams = pd.DataFrame(columns=['teams', 'team_id', 'season'])
    for season in seasons:
        temp = pd.DataFrame(generate_teams_id_dataframe(season), columns=['teams'])
        temp['team_id'] = [i for i in range(1,len(temp.teams)+1)]
        temp['season'] = season
        teams = pd.concat([teams, temp], ignore_index=True)
    return teams

In [4]:
teams = populate_team_dataframe(seasons)

In [5]:
teams

Unnamed: 0,teams,team_id,season
0,arsenal,1,2016-17
1,bournemouth,2,2016-17
2,burnley,3,2016-17
3,chelsea,4,2016-17
4,crystalpalace,5,2016-17
...,...,...,...
135,nottinghamforest,16,2022-23
136,southampton,17,2022-23
137,tottenhamhotspur,18,2022-23
138,westhamunited,19,2022-23


In [6]:
def add_promotion_indicator(teams: pd.DataFrame, seasons: list) -> pd.DataFrame:
    promotion_indicator = []
    for i, team in teams.iterrows():
        if team['season'] == seasons[0]:
            promotion_indicator.append(0)
        elif len(teams[(teams['teams'] == team['teams']) & (teams['season'] == seasons[seasons.index(team['season'])-1])]) > 0:
            promotion_indicator.append(0)
        else:
            promotion_indicator.append(1)
    teams['promotion_indicator'] = promotion_indicator
    return teams


In [7]:
teams = add_promotion_indicator(teams, seasons)
teams

Unnamed: 0,teams,team_id,season,promotion_indicator
0,arsenal,1,2016-17,0
1,bournemouth,2,2016-17,0
2,burnley,3,2016-17,0
3,chelsea,4,2016-17,0
4,crystalpalace,5,2016-17,0
...,...,...,...,...
135,nottinghamforest,16,2022-23,1
136,southampton,17,2022-23,0
137,tottenhamhotspur,18,2022-23,0
138,westhamunited,19,2022-23,0


In [8]:
teams.to_csv('/Users/storm/Documents/GitHub/Fantasy-Premier-League/webscraper_output/teams_webscraper.csv')