In [1]:
# Package Imports and CSV Reads

import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
import requests
import pandas as pd
import time
import tqdm

import warnings
warnings.filterwarnings("ignore")

big5_squad_codes = pd.read_csv('big5_squad_codes.csv')
mls_squad_codes = pd.read_csv('mls_squad_codes.csv')

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
def scrape_and_save(links, columns, df, file_name):
    i = 0
    # Loop through each link
    for link in tqdm.tqdm(links):
        time.sleep(3)
        
        try:
            # Use requests to get the HTML content of the page
            res = requests.get(links[i])
            soup = BeautifulSoup(res.text, 'html.parser')
            i += 1
            # Find the header for team info - this is different than the rest of the stats categories
            inner_nav = soup.find('div', id='inner_nav')
            header_data = inner_nav.find('a').text

            # Find the table with the specified div id
            table = soup.find('table', {'id': 'matchlogs_for'})

            # Find table body
            table_body = table.find('tbody')

            # Find all the rows in the table
            rows = table_body.find_all('tr')

            # Initialize an empty list to store the data
            data = []

            # Loop through each row and extract the data
            for row in rows:
                cols = row.find_all(['td','th'])
                cols = [col.text.strip() for col in cols]
                data.append(cols)

            # Create a temporary dataframe from the extracted data
            temp_df = pd.DataFrame(data, columns=columns)
            temp_df['Team'] = header_data
            df = df.append(temp_df, ignore_index=True)
            
        except:
            continue

    # Save the DataFrame as a CSV file
    df.to_csv(file_name, index=False)

In [3]:
#lists to loop through
cats = ['schedule','shooting','keeper','passing','passing_types','gca','defense','possession','misc']
year_range = ['2017-2018','2018-2019','2019-2020','2020-2021','2021-2022','2022-2023']
mls_year_range = ['2018','2019','2020','2021','2022']

In [4]:
#FB Ref Columns to create dataframes. 

big_5_score_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','xG','xGA','Possession%','Attendance','Captain','Formation','Referee','Match Report','Notes']]

big_5_shooting_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','Gls','Sh','SoT','SoT%','G/Sh','G/SoT','Avg Shot Distance','Free Kick Shots','Penalties Made','PKatt','xG','Non pen xG','npxG/Sh','G minus xG','npxG-xG','Match Report']]

big_5_goalkeeping_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','SoTA','GA','Saves','Save%','Clean Sheet','Post-ShotxG','PSxGPlusMinus','PKatt','PKAllowed','PKsaved','PKmissed','Passes40ydspluscompleted','Passes40ydsplusAttempted','Passes40ydsplusCmp%','PassesAtt','PassesThrwn','%ofPassesLaunched','AvgPassLen','GoalKicksAtt','GoalKicks%Launched','GoalKicksAvgLen','Crosses Faced','Crosses Stopped','% of Opp Crosses Stopped','# of Sweeper Actions','AvgDist sweeper Actions','Match Report']]

big_5_passing_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','Passes Completed','Passes Attempted','Pass Completion %','Total Pass Dist','Total Pass Dist Twd Goal','Short Pass Comp','Short Pass Att','Shrt Pass Cmp%','Med Pass Comp','Med Pass Att','Med Pass Cmp %','Lng Pass Cmp','Lng Pass Att','Lng Pass Cmp %','Ast','xAssistedGoals','xAssists','KeyPasses','Attacking Third Passes','Passes Into 18 yd box Comp','Completed Cross','Prog Passes past own 40 yd line','Match Report']]

big_5_pass_type_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','Passes Attempted','Live Passes','Deadball passes','Free Kick Passes','Passes between open defenders','Passes that go 40 plus yds wide','Crosses','Throw Ins','Corner Kicks','Inswinger Corners','Outswinging Corners','Straight Corners','Passes Completed','Offsides','Passes Blocked','Match Report']]

big_5_goal_shot_creation_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','Shot Creating Actions','Live Passes Leading to shot','Deadball Passes Leading to shot','Dribbles leading to shot','Shots leading to another shot','Fouls drawn leading to shot','Def Actions Leading to Shot','Goal Creating Actions','Passes leading to goal','Goals from deadball','Goals from dribble','Goals from other shot','Goals from foul drawn','Goals from defensive action','Match Report']]

big_5_defensive_action_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','Tackles','TacklesWon','Tackles in Def 3rd','Tackles in Mid 3rd','Tackles in Att 3rd','Tackles vs Dribble','Tackles vs Dribble Att','% of Dribblers tackled succesful','# of times dribbled past','Blocks','Shots Blocked','Passes Blocked','Interceptions','Tackles + Int','Clearances','Errors','Match Report']]

big_5_possession_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','Possession','Touches','Touches in Def Pen Area','Touches in Def 3rd','Touches in Mid 3rd','Touches in Att 3rd','Touches in Att Pen Area','Live ball touches','Attempted Take ons','Succesful Take Ons','Take on success rate','Number of times tackled during take on','% of take ons tackled','Carries','Total Carry Distance','Progressive Carrying Distance','Progressive Carries','Carries into final third','Carries into penalty area','Miscontrols','Dispossessions','Passes Received','Progressive Passes Received','Match Report']]

big_5_misc_stats_columns = [['Date','Time','Comp','Round','Day','Venue','Result','GF','GA','Opponent','CrdY','CrdR','2CrdY','Fls','Fld','Off','Crs','Int','TklW','PKwon','PKcon','OG','Loose Balls Recov','Aerials Won','Aerials Lost','Aerials Won%','Match Report']]

big_5_opp_shooting_columns = [['opp' + x for x in big_5_shooting_columns[0]]]

big_5_opp_goalkeeping_columns = [['opp' + x for x in big_5_goalkeeping_columns[0]]]

big_5_opp_passing_columns = [['opp' + x for x in big_5_passing_columns[0]]]

big_5_opp_pass_type_columns = [['opp' + x for x in big_5_pass_type_columns[0]]]

big_5_opp_goal_shot_creation_columns = [['opp' + x for x in big_5_goal_shot_creation_columns[0]]]

big_5_opp_defensive_action_columns = [['opp' + x for x in big_5_defensive_action_columns[0]]]

big_5_opp_possession_columns = [['opp' + x for x in big_5_possession_columns[0]]]

big_5_opp_misc_stats_columns = [['opp' + x for x in big_5_misc_stats_columns[0]]]

mls_score_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','xG','xGA','Possession%','Attendance','Captain','Formation','Referee','Match Report','Notes']]

mls_shooting_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','Gls','Sh','SoT','SoT%','G/Sh','G/SoT','Avg Shot Distance','Free Kick Shots','Penalties Made','PKatt','xG','Non pen xG','npxG/Sh','G minus xG','npxG-xG','Match Report']]

mls_goalkeeping_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','SoTA','GA','Saves','Save%','Clean Sheet','Post-ShotxG','PSxGPlusMinus','PKatt','PKAllowed','PKsaved','PKmissed','Passes40ydspluscompleted','Passes40ydsplusAttempted','Passes40ydsplusCmp%','PassesAtt','PassesThrwn','%ofPassesLaunched','AvgPassLen','GoalKicksAtt','GoalKicks%Launched','GoalKicksAvgLen','OppCrossesAtt','OppCrossesStp','OppCrossesStp%','# of Sweeper Actions','AvgDist sweeper Actions','Match Report']]

mls_passing_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','Passes Completed','Passes Attempted','Pass Completion %','Total Pass Dist','Total Pass Dist Twd Goal','Short Pass Comp','Short Pass Att','Shrt Pass Cmp%','Med Pass Comp','Med Pass Att','Med Pass Cmp %','Lng Pass Cmp','Lng Pass Att','Lng Pass Cmp %','Ast','xAssistedGoals','xAssists','KeyPasses','Attacking Third Passes','Passes Into 18 yd box Comp','Completed Cross','Prog Passes past own 40 yd line','Match Report']]

mls_pass_type_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','Passes Attempted','Live Passes','Deadball passes','Free Kick Passes','Passes between open defenders','Passes that go 40 plus yds wide','Crosses','Throw Ins','Corner Kicks','Inswinger Corners','Outswinging Corners','Straight Corners','Passes Completed','Offsides','Passes Blocked','Match Report']]

mls_goal_shot_creation_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','Shot Creating Actions','Live Passes Leading to shot','Deadball Passes Leading to shot','Dribbles leading to shot','Shots leading to another shot','Fouls drawn leading to shot','Def Actions Leading to Shot','Goal Creating Actions','Passes leading to goal','Goals from deadball','Goals from dribble','Goals from other shot','Goals from foul drawn','Goals from defensive action','Match Report']]

mls_defensive_action_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','Tackles','TacklesWon','Tackles in Def 3rd','Tackles in Mid 3rd','Tackles in Att 3rd','Tackles vs Dribble','Tackles vs Dribble Att','% of Dribblers tackled succesful','# of times dribbled past','Blocks','Shots Blocked','Passes Blocked','Interceptions','Tackles + Int','Clearances','Errors','Match Report']]

mls_possession_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','Possession','Touches','Touches in Def Pen Area','Touches in Def 3rd','Touches in Mid 3rd','Touches in Att 3rd','Touches in Att Pen Area','Live ball touches','Attempted Take ons','Succesful Take Ons','Take on success rate','Number of times tackled during take on','% of take ons tackled','Carries','Total Carry Distance','Progressive Carrying Distance','Progressive Carries','Carries into final third','Carries into penalty area','Miscontrols','Dispossessions','Passes Received','Progressive Passes Received','Match Report']]

mls_misc_stats_columns = [['Date','Time','Round','Day','Venue','Result','GF','GA','Opponent','CrdY','CrdR','2CrdY','Fls','Fld','Off','Crs','Int','TklW','PKwon','PKcon','OG','Loose Balls Recov','Aerials Won','Aerials Lost','Aerials Won%','Match Report']]

mls_opp_shooting_columns = [['opp' + x for x in mls_shooting_columns[0]]]

mls_opp_goalkeeping_columns = [['opp' + x for x in mls_goalkeeping_columns[0]]]

mls_opp_passing_columns = [['opp' + x for x in mls_passing_columns[0]]]

mls_opp_pass_type_columns = [['opp' + x for x in mls_pass_type_columns[0]]]

mls_opp_goal_shot_creation_columns = [['opp' + x for x in mls_goal_shot_creation_columns[0]]]

mls_opp_defensive_action_columns = [['opp' + x for x in mls_defensive_action_columns[0]]]

mls_opp_possession_columns = [['opp' + x for x in mls_possession_columns[0]]]

mls_opp_misc_stats_columns = [['opp' + x for x in mls_misc_stats_columns[0]]]

In [5]:
# Score Links (need to change Match-Logs to Scores-and-Fixtures in link builder.) mls Links are seperate because the tables don't include a competition column. Fbref hates the open cup apparently. 
big_5_score_links = []
mls_score_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[0]}/{big5_squad_codes['Text'][squad]}")
        big_5_score_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[0]}/{mls_squad_codes['Text'][squad]}")
        mls_score_links.append(link)
    
def replace_text(links):
    return [s.replace("Match-Logs", "Scores-and-Fixtures") for s in links]

big_5_score_links = replace_text(big_5_score_links)
mls_score_links = replace_text(mls_score_links)

# Shooting Links
big_5_shooting_links = []
mls_shooting_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[1]}/{big5_squad_codes['Text'][squad]}")
        big_5_shooting_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[1]}/{mls_squad_codes['Text'][squad]}")
        mls_shooting_links.append(link)
        
# Goalkeeping Links
big_5_goalkeeping_links = []
mls_goalkeeping_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[2]}/{big5_squad_codes['Text'][squad]}")
        big_5_goalkeeping_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[2]}/{mls_squad_codes['Text'][squad]}")
        mls_goalkeeping_links.append(link)
        
# Passing Links
big_5_passing_links = []
mls_passing_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[3]}/{big5_squad_codes['Text'][squad]}")
        big_5_passing_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[3]}/{mls_squad_codes['Text'][squad]}")
        mls_passing_links.append(link)
        
# Pass Type Links
big_5_pass_type_links = []
mls_pass_type_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[4]}/{big5_squad_codes['Text'][squad]}")
        big_5_pass_type_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[4]}/{mls_squad_codes['Text'][squad]}")
        mls_pass_type_links.append(link)
        
# Goal Shot Creation Links
big_5_goal_shot_creation_links = []
mls_goal_shot_creation_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[5]}/{big5_squad_codes['Text'][squad]}")
        big_5_goal_shot_creation_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[5]}/{mls_squad_codes['Text'][squad]}")
        mls_goal_shot_creation_links.append(link)
        
# Defensive Action Links
big_5_defensive_action_links = []
mls_defensive_action_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[6]}/{big5_squad_codes['Text'][squad]}")
        big_5_defensive_action_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[6]}/{mls_squad_codes['Text'][squad]}")
        mls_defensive_action_links.append(link)
        
# Possession Links
big_5_possession_links = []
mls_possession_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[7]}/{big5_squad_codes['Text'][squad]}")
        big_5_possession_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[7]}/{mls_squad_codes['Text'][squad]}")
        mls_possession_links.append(link)
        
# Misc Links
big_5_misc_stats_links = []
mls_misc_stats_links = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[8]}/{big5_squad_codes['Text'][squad]}")
        big_5_misc_stats_links.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[8]}/{mls_squad_codes['Text'][squad]}")
        mls_misc_stats_links.append(link)
        


In [6]:
#lists to loop through
cats = ['schedule','shooting','keeper','passing','passing_types','gca','defense','possession','misc']
year_range = ['2022-2023']
mls_year_range = ['2023']

#2023 Link Creation
big_5_score_links_2023 = []
mls_score_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[0]}/{big5_squad_codes['Text'][squad]}")
        big_5_score_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[0]}/{mls_squad_codes['Text'][squad]}")
        mls_score_links_2023.append(link)
    
def replace_text(links):
    return [s.replace("Match-Logs", "Scores-and-Fixtures") for s in links]

big_5_score_links_2023 = replace_text(big_5_score_links_2023)
mls_score_links_2023 = replace_text(mls_score_links_2023)

# Shooting Links
big_5_shooting_links_2023 = []
mls_shooting_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[1]}/{big5_squad_codes['Text'][squad]}")
        big_5_shooting_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[1]}/{mls_squad_codes['Text'][squad]}")
        mls_shooting_links_2023.append(link)
        
# Goalkeeping Links
big_5_goalkeeping_links_2023 = []
mls_goalkeeping_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[2]}/{big5_squad_codes['Text'][squad]}")
        big_5_goalkeeping_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[2]}/{mls_squad_codes['Text'][squad]}")
        mls_goalkeeping_links_2023.append(link)
        
# Passing Links
big_5_passing_links_2023 = []
mls_passing_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[3]}/{big5_squad_codes['Text'][squad]}")
        big_5_passing_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[3]}/{mls_squad_codes['Text'][squad]}")
        mls_passing_links_2023.append(link)
        
# Pass Type Links
big_5_pass_type_links_2023 = []
mls_pass_type_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[4]}/{big5_squad_codes['Text'][squad]}")
        big_5_pass_type_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[4]}/{mls_squad_codes['Text'][squad]}")
        mls_pass_type_links_2023.append(link)
        
# Goal Shot Creation Links
big_5_goal_shot_creation_links_2023 = []
mls_goal_shot_creation_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[5]}/{big5_squad_codes['Text'][squad]}")
        big_5_goal_shot_creation_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[5]}/{mls_squad_codes['Text'][squad]}")
        mls_goal_shot_creation_links_2023.append(link)
        
# Defensive Action Links
big_5_defensive_action_links_2023 = []
mls_defensive_action_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[6]}/{big5_squad_codes['Text'][squad]}")
        big_5_defensive_action_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[6]}/{mls_squad_codes['Text'][squad]}")
        mls_defensive_action_links_2023.append(link)
        
# Possession Links
big_5_possession_links_2023 = []
mls_possession_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[7]}/{big5_squad_codes['Text'][squad]}")
        big_5_possession_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[7]}/{mls_squad_codes['Text'][squad]}")
        mls_possession_links_2023.append(link)
        
# Misc Links
big_5_misc_stats_links_2023 = []
mls_misc_stats_links_2023 = []

for year in range(len(year_range)):
    for squad in range(len(big5_squad_codes)):
        link = (f"https://fbref.com/en/squads/{big5_squad_codes['Squad_Code'][squad]}/{year_range[year]}/matchlogs/all_comps/{cats[8]}/{big5_squad_codes['Text'][squad]}")
        big_5_misc_stats_links_2023.append(link)
        
for year in range(len(mls_year_range)):
    for squad in range(len(mls_squad_codes)):
        link = (f"https://fbref.com/en/squads/{mls_squad_codes['Squad_Code'][squad]}/{mls_year_range[year]}/matchlogs/c22/{cats[8]}/{mls_squad_codes['Text'][squad]}")
        mls_misc_stats_links_2023.append(link)

In [None]:
mls_misc_stats_links_2023

In [None]:
len(big_5_goalkeeping_links)

In [7]:
# Team & Opp Dataframes. Big 5 and mls are seperate. 
big_5_score_df = pd.DataFrame(columns=big_5_score_columns, dtype=object)
big_5_shooting_df = pd.DataFrame(columns=big_5_shooting_columns, dtype=object)
big_5_goalkeeping_df = pd.DataFrame(columns=big_5_goalkeeping_columns, dtype=object)
big_5_passing_df = pd.DataFrame(columns=big_5_passing_columns, dtype=object)
big_5_pass_type_df = pd.DataFrame(columns=big_5_pass_type_columns, dtype=object)
big_5_goal_shot_creation_df = pd.DataFrame(columns=big_5_goal_shot_creation_columns, dtype=object)
big_5_defensive_action_df = pd.DataFrame(columns=big_5_defensive_action_columns, dtype=object)
big_5_possession_df = pd.DataFrame(columns=big_5_possession_columns, dtype=object)
big_5_misc_stats_df = pd.DataFrame(columns=big_5_misc_stats_columns, dtype=object)

big_5_opp_shooting_df = pd.DataFrame(columns=big_5_opp_shooting_columns, dtype=object)
big_5_opp_goalkeeping_df = pd.DataFrame(columns=big_5_opp_goalkeeping_columns, dtype=object)
big_5_opp_passing_df = pd.DataFrame(columns=big_5_opp_passing_columns, dtype=object)
big_5_opp_pass_type_df = pd.DataFrame(columns=big_5_opp_pass_type_columns, dtype=object)
big_5_opp_goal_shot_creation_df = pd.DataFrame(columns=big_5_opp_goal_shot_creation_columns, dtype=object)
big_5_opp_defensive_action_df = pd.DataFrame(columns=big_5_opp_defensive_action_columns, dtype=object)
big_5_opp_possession_df = pd.DataFrame(columns=big_5_opp_possession_columns, dtype=object)
big_5_opp_misc_stats_df = pd.DataFrame(columns=big_5_opp_misc_stats_columns, dtype=object)

mls_score_df = pd.DataFrame(columns=mls_score_columns, dtype=object)
mls_shooting_df = pd.DataFrame(columns=mls_shooting_columns, dtype=object)
mls_goalkeeping_df = pd.DataFrame(columns=mls_goalkeeping_columns, dtype=object)
mls_passing_df = pd.DataFrame(columns=mls_passing_columns, dtype=object)
mls_pass_type_df = pd.DataFrame(columns=mls_pass_type_columns, dtype=object)
mls_goal_shot_creation_df = pd.DataFrame(columns=mls_goal_shot_creation_columns, dtype=object)
mls_defensive_action_df = pd.DataFrame(columns=mls_defensive_action_columns, dtype=object)
mls_possession_df = pd.DataFrame(columns=mls_possession_columns, dtype=object)
mls_misc_stats_df = pd.DataFrame(columns=mls_misc_stats_columns, dtype=object)

mls_opp_shooting_df = pd.DataFrame(columns=mls_opp_shooting_columns, dtype=object)
mls_opp_goalkeeping_df = pd.DataFrame(columns=mls_opp_goalkeeping_columns, dtype=object)
mls_opp_passing_df = pd.DataFrame(columns=mls_opp_passing_columns, dtype=object)
mls_opp_pass_type_df = pd.DataFrame(columns=mls_opp_pass_type_columns, dtype=object)
mls_opp_goal_shot_creation_df = pd.DataFrame(columns=mls_opp_goal_shot_creation_columns, dtype=object)
mls_opp_defensive_action_df = pd.DataFrame(columns=mls_opp_defensive_action_columns, dtype=object)
mls_opp_possession_df = pd.DataFrame(columns=mls_opp_possession_columns, dtype=object)
mls_opp_misc_stats_df = pd.DataFrame(columns=mls_opp_misc_stats_columns, dtype=object)



In [None]:
# Big 5 Score Data - collected 3.5
i = 0
# Loop through each link
for link in tqdm.tqdm(big_5_score_links_2023):
    time.sleep(3)
     
    try:
        # Use requests to get the HTML content of the page
        res = requests.get(big_5_score_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1
        # Find the header for team info - this is different than the rest of the stats categories
        inner_nav = soup.find('div', id='inner_nav')
        header_data = inner_nav.find('a').text


        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_score_columns)
        temp_df['Team'] = header_data
        big_5_score_df = big_5_score_df.append(temp_df, ignore_index=True)
        
    except:
        continue

In [None]:
big_5_score_df.to_csv('big_5_score_data_2023.csv',index=False)

In [None]:
# mls Score Data - collected 3.5
i = 0

for link in tqdm.tqdm(mls_score_links_2023):
    try:
        time.sleep(3)

        # Use requests to get the HTML content of the page
        res = requests.get(mls_score_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1
        # Find the header for team info - this is different than the rest of the stats categories
        inner_nav = soup.find('div', id='inner_nav')
        header_data = inner_nav.find('a').text


        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_score_columns)
        temp_df['Team'] = header_data
        mls_score_df = mls_score_df.append(temp_df, ignore_index=True)
    
    except:
        continue

In [None]:
mls_score_df

In [None]:
mls_score_df.to_csv('mls_score_data_2023.csv',index=False)

In [None]:
# Big 5 Team Shooting Info - collected 3.5
i = 0
for link in tqdm.tqdm(big_5_shooting_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_shooting_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_shooting_columns)
        temp_df['Team'] = header_data
        big_5_shooting_df = big_5_shooting_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_shooting_df.to_csv('big_5_shooting_2023.csv',index=False)

In [None]:
# Big 5 Opponent Shooting Info - collected 3.5
i = 0
for link in tqdm.tqdm(big_5_shooting_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_shooting_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_opp_shooting_columns)
        temp_df['Team'] = header_data
        big_5_opp_shooting_df = big_5_opp_shooting_df.append(temp_df, ignore_index=True)
        
    except:
        continue

In [None]:
big_5_opp_shooting_df.to_csv('big_5_opp_shooting_2023.csv',index=False)

In [None]:
# MLS Team Shooting Info - collected 3.5

i = 0
for link in tqdm.tqdm(mls_shooting_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_shooting_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_shooting_columns)
        temp_df['Team'] = header_data
        mls_shooting_df = mls_shooting_df.append(temp_df, ignore_index=True)
    
    except:
        continue

In [None]:
mls_shooting_df.to_csv('mls_shooting_2023.csv',index=False)

In [None]:
# MLS Opponent Shooting Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_shooting_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_shooting_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_opp_shooting_columns)
        temp_df['Team'] = header_data
        mls_opp_shooting_df = mls_opp_shooting_df.append(temp_df, ignore_index=True)
    
    except:
        continue

In [None]:
mls_opp_shooting_df.to_csv('mls_opp_shooting_2023.csv',index=False)

In [None]:
# Big 5 Goalkeeping Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_goalkeeping_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_goalkeeping_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_goalkeeping_columns)
        # This breaks the code for some reason. temp_df['Team'] = header_data
        big_5_goalkeeping_df = big_5_goalkeeping_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_goalkeeping_df.to_csv('big_5_goalkeeping_2023.csv',index=False)

In [None]:
# Big 5 Opponent Goalkeeping Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_goalkeeping_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_goalkeeping_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_opp_goalkeeping_columns)
        # This breaks the code for some reason temp_df['Team'] = header_data
        big_5_opp_goalkeeping_df = big_5_opp_goalkeeping_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_opp_goalkeeping_df.to_csv('big_5_opp_goalkeeping_2023.csv',index=False)

In [None]:
# MLS Goalkeeping Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_goalkeeping_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_goalkeeping_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_goalkeeping_columns)
        # This breaks the code for some reason temp_df['Team'] = header_data
        mls_goalkeeping_df = mls_goalkeeping_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_goalkeeping_df.to_csv('mls_goalkeeping_2023.csv',index=False)

In [None]:
# MLS Opponent Goalkeeping Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_goalkeeping_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_goalkeeping_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_opp_goalkeeping_columns)
        #temp_df['Team'] = header_data
        mls_opp_goalkeeping_df = mls_opp_goalkeeping_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_opp_goalkeeping_df.to_csv('mls_opp_goalkeeping_2023.csv',index=False)

In [None]:
# Big 5 Team Passing Info - collected 3.5

i = 0
for link in tqdm.tqdm(big_5_passing_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_passing_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_passing_columns)
        temp_df['Team'] = header_data
        big_5_passing_df = big_5_passing_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_passing_df.to_csv('big_5_passing_data_2023.csv',index=False)

In [None]:
# Big 5 Opponent Passing Info - collected 3.5

i = 0
for link in tqdm.tqdm(big_5_passing_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_passing_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_opp_passing_columns)
        temp_df['Team'] = header_data
        big_5_opp_passing_df = big_5_opp_passing_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_opp_passing_df.to_csv('big_5_opp_passing_data_2023.csv',index=False)

In [None]:
# MLS Passing Info - collected 3.5

i = 0
for link in tqdm.tqdm(mls_passing_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_passing_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_passing_columns)
        temp_df['Team'] = header_data
        mls_passing_df = mls_passing_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_passing_df.to_csv("mls_passing_2023.csv",index=False)

In [None]:
# MLS Opponent Passing Info - collected 3.5

i = 0
for link in tqdm.tqdm(mls_passing_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_passing_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_opp_passing_columns)
        temp_df['Team'] = header_data
        mls_opp_passing_df = mls_opp_passing_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_opp_passing_df.to_csv("mls_opp_passing_2023.csv",index=False)

In [None]:
# Big 5 Team Pass Type Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_pass_type_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_pass_type_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_pass_type_columns)
        temp_df['Team'] = header_data
        big_5_pass_type_df = big_5_pass_type_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_pass_type_df.to_csv('big_5_pass_type_2023.csv',index=False)

In [None]:
# Big 5 Opponent Pass Type Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_pass_type_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_pass_type_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_opp_pass_type_columns)
        temp_df['Team'] = header_data
        big_5_opp_pass_type_df = big_5_opp_pass_type_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_opp_pass_type_df.to_csv('big_5_opp_pass_type_2023.csv',index=False)

In [None]:
# MLS Pass Type Info Already collected 3.5

i = 0
for link in tqdm.tqdm(mls_pass_type_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_pass_type_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_pass_type_columns)
        temp_df['Team'] = header_data
        mls_pass_type_df = mls_pass_type_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_pass_type_df.to_csv("mls_team_pass_type_2023.csv",index=False)

In [None]:
# MLS Opponent Pass Type Info Already collected 3.5

i = 0
for link in tqdm.tqdm(mls_pass_type_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_pass_type_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_opp_pass_type_columns)
        temp_df['Team'] = header_data
        mls_opp_pass_type_df = mls_opp_pass_type_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_opp_pass_type_df.to_csv("mls_opp_pass_type_2023.csv",index=False)

In [None]:
# Big 5 Team Goal Shot Creation Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_goal_shot_creation_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_goal_shot_creation_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_goal_shot_creation_columns)
        temp_df['Team'] = header_data
        big_5_goal_shot_creation_df = big_5_goal_shot_creation_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_goal_shot_creation_df.to_csv('big_5_sca_data_2023.csv', index=False)

In [None]:
# Big 5 Opponent Goal Shot Creation Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_goal_shot_creation_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_goal_shot_creation_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_opp_goal_shot_creation_columns)
        temp_df['Team'] = header_data
        big_5_opp_goal_shot_creation_df = big_5_opp_goal_shot_creation_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_opp_goal_shot_creation_df.to_csv('big_5_opp_gca_2023.csv', index=False)

In [None]:
# MLS Goal Shot Creation Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_goal_shot_creation_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_goal_shot_creation_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_goal_shot_creation_columns)
        temp_df['Team'] = header_data
        mls_goal_shot_creation_df = mls_goal_shot_creation_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_goal_shot_creation_df.to_csv("mls_gca_2023.csv",index=False)

In [None]:
# MLS Opponent Goal Shot Creation Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_goal_shot_creation_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_goal_shot_creation_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_opp_goal_shot_creation_columns)
        temp_df['Team'] = header_data
        mls_opp_goal_shot_creation_df = mls_opp_goal_shot_creation_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_opp_goal_shot_creation_df.to_csv("mls_opp_gca_2023.csv",index=False)

In [None]:
# Big 5 Team Defensive Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_defensive_action_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_defensive_action_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_defensive_action_columns)
        temp_df['Team'] = header_data
        big_5_defensive_action_df = big_5_defensive_action_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_defensive_action_df.to_csv('big_5_defense_2023.csv', index=False)

In [None]:
# Big 5 Opponent Defense Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_defensive_action_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_defensive_action_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_opp_defensive_action_columns)
        temp_df['Team'] = header_data
        big_5_opp_defensive_action_df = big_5_opp_defensive_action_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_opp_defensive_action_df.to_csv('big_5_opp_defense_2023.csv',index=False)

In [None]:
# MLS Defense Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_defensive_action_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_defensive_action_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_defensive_action_columns)
        temp_df['Team'] = header_data
        mls_defensive_action_df = mls_defensive_action_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_defensive_action_df.to_csv('mls_team_defense_2023.csv',index=False)

In [None]:
# MLS Opponent Defense Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_defensive_action_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_defensive_action_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_opp_defensive_action_columns)
        temp_df['Team'] = header_data
        mls_opp_defensive_action_df = mls_opp_defensive_action_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_opp_defensive_action_df.to_csv('mls_opp_defense_2023.csv',index=False)

In [None]:
# Big 5 Team Possession Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_possession_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_possession_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_possession_columns)
        temp_df['Team'] = header_data
        big_5_possession_df = big_5_possession_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_possession_df.to_csv('big_5_team_possession_2023.csv',index=False)

In [None]:
# Big 5 Opponent Possession Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_possession_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_possession_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_opp_possession_columns)
        temp_df['Team'] = header_data
        big_5_opp_possession_df = big_5_opp_possession_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_opp_possession_df.to_csv('big_5_opp_possession_2023.csv',index=False)

In [None]:
# MLS Possession Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_possession_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_possession_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_possession_columns)
        temp_df['Team'] = header_data
        mls_possession_df = mls_possession_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_possession_df.to_csv('mls_poss_2023.csv',index=False)

In [None]:
# MLS Opponent Possession Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_possession_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_possession_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_opp_possession_columns)
        temp_df['Team'] = header_data
        mls_opp_possession_df = mls_opp_possession_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_opp_possession_df.to_csv('mls_opp_poss_2023.csv',index=False)

In [None]:
# Big 5 Team Misc Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_misc_stats_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_misc_stats_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_misc_stats_columns)
        temp_df['Team'] = header_data
        big_5_misc_stats_df = big_5_misc_stats_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_misc_stats_df.to_csv('big_5_misc_2023.csv',index=False)

In [None]:
# Big 5 Opponent Misc Info collected 3.5

i = 0
for link in tqdm.tqdm(big_5_misc_stats_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(big_5_misc_stats_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=big_5_opp_misc_stats_columns)
        temp_df['Team'] = header_data
        big_5_opp_misc_stats_df = big_5_opp_misc_stats_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
big_5_opp_misc_stats_df.to_csv('big_5_opp_misc_2023.csv',index=False)

In [None]:
# MLS Misc Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_misc_stats_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_misc_stats_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_for'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_misc_stats_columns)
        temp_df['Team'] = header_data
        mls_misc_stats_df = mls_misc_stats_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_misc_stats_df.to_csv('mls_misc_2023.csv',index=False)

In [None]:
# MLS Opponent Misc Info collected 3.5

i = 0
for link in tqdm.tqdm(mls_misc_stats_links_2023):
    time.sleep(3)
    
    try:
        res = requests.get(mls_misc_stats_links_2023[i])
        soup = BeautifulSoup(res.text, 'html.parser')
        i += 1

        # Find the header for team info
        header_data = soup.find('th', {'data-stat': 'header_for_against'}).text

        # Find the table with the specified div id
        table = soup.find('table', {'id': 'matchlogs_against'})

        # Find table body
        table_body = table.find('tbody')

        # Find all the rows in the table
        rows = table_body.find_all('tr')

        # Initialize an empty list to store the data
        data = []

        # Loop through each row and extract the data
        for row in rows:
            cols = row.find_all(['td','th'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Create a temporary dataframe from the extracted data
        temp_df = pd.DataFrame(data, columns=mls_opp_misc_stats_columns)
        temp_df['Team'] = header_data
        mls_opp_misc_stats_df = mls_opp_misc_stats_df.append(temp_df, ignore_index=True)
        
    except:
        continue 

In [None]:
mls_opp_misc_stats_df.to_csv('mls_opp_misc_2023.csv',index=False)