In [4]:
import pandas as pd
import os
import datetime

# Path to the folder where you unzipped the CSVs
from directory import data_dir

In [5]:
df_leagues = pd.read_csv(data_dir + "/leagues.csv")
df_players = pd.read_csv(data_dir + "/players.csv", low_memory=False)
df_teams = pd.read_csv(data_dir + "/teams.csv")
df_fixtures = pd.read_csv(data_dir + "/fixtures.csv")
df_teamStats = pd.read_csv(data_dir + "/teamStats.csv")
df_teamRoster = pd.read_csv(data_dir + "/teamRoster.csv")
df_keyEventDescription = pd.read_csv(data_dir + "/keyEventDescription.csv")
df_status = pd.read_csv(data_dir + "/status.csv")
df_venues = pd.read_csv(data_dir + "/venues.csv")


In [6]:
# --- USER INPUT FOR FILE NAMES ---
stats_file_1 = input("Enter the first player stats file (e.g., playerstats_2024_ENG.1): ")
stats_file_2 = input("Enter the second player stats file (e.g., playerstats_2025_ENG.1): ")

df_playerstats = data_dir + "/" + stats_file_1 + ".csv"
df_playerstats2 = data_dir + "/" + stats_file_2 + ".csv"


try:
    df_playerstats = pd.read_csv(df_playerstats, low_memory=False)
    print(f" Loaded {stats_file_1}.csv")

    df_playerstats2 = pd.read_csv(df_playerstats2, low_memory=False)
    print(f" Loaded {stats_file_2}.csv")

except FileNotFoundError as e:
    print(f"❌ ERROR: File not found. Check your filenames. {e}")
except Exception as e:
    print(f"An error occurred: {e}")



 Loaded playerstats_2024_ITA.1.csv
 Loaded playerstats_2025_ENG.1.csv


In [7]:
df_playerstats = pd.merge(df_playerstats, df_players, how = "inner", left_on = ["athleteId"], right_on = ["athleteId"])

In [8]:
df_playerstats = pd.merge(df_playerstats, df_teams, how = "inner", left_on = ["teamId"], right_on = ["teamId"])


In [9]:
df_playerstats = pd.merge(df_playerstats, df_leagues, how = "inner", left_on = ["seasonType"], right_on = ["seasonType"])


In [10]:
df_playerstats

Unnamed: 0,seasonType,year_x,league,teamId,athleteId,appearances_value,subIns_value,foulsCommitted_value,foulsSuffered_value,yellowCards_value,...,logoURL,venueId,slug_y,year_y,seasonName,seasonSlug,leagueId,midsizeName,leagueName,leagueShortName
0,12750,2024,ITA.1,104,6729,0,0,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/1...,8,ita.as_roma,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A
1,12750,2024,ITA.1,4007,11935,1,1,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/4...,2470,ita.monza,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A
2,12750,2024,ITA.1,2572,12910,12,0,1,1,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/2...,5934,ita.como,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A
3,12750,2024,ITA.1,111,24412,7,1,7,10,1,...,https://a.espncdn.com/i/teamlogos/soccer/500/1...,4600,ita.juventus,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A
4,12750,2024,ITA.1,111,32855,0,0,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/1...,4600,ita.juventus,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
825,12750,2024,ITA.1,239,403148,2,2,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/2...,2599,ita.torino,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A
826,12750,2024,ITA.1,3263,403234,25,9,21,25,2,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,2590,ita.genoa,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A
827,12750,2024,ITA.1,4007,403329,10,5,2,11,2,...,https://a.espncdn.com/i/teamlogos/soccer/500/4...,2470,ita.monza,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A
828,12750,2024,ITA.1,105,403445,0,0,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/1...,4253,ita.atalanta,2024,2024-25 Italian Serie A,2024-25-italian-serie-a,730,ITA.1,Italian Serie A,Serie A


In [11]:

dropped_columns = ['seasonType', 'teamId', 'athleteId', 'timestamp_x', 'firstName', 'middleName', 'lastName',
                   'fullName', 'shortName', 'nickName', 'slug_x', 'gender', 'birthPlaceCountry', 'positionAbbreviation',
                   'headshotUrl', 'headshot_alt', 'timestamp_y', 'location', 'name', 'abbreviation', 'shortDisplayName',
                   'color', 'alternateColor', 'logoURL', 'venueId', 'slug_y', 'seasonSlug', 'leagueId', 'midsizeName',
                   'leagueName', 'leagueShortName']

df_playerstats = df_playerstats.drop(columns=dropped_columns)
df_playerstats = df_playerstats.drop(columns={"league", })

new_order = ['seasonName', 'displayName_y', 'positionName', 'displayName_x', 'jersey', 'age', 'citizenship',
             'displayHeight', 'displayWeight', 'appearances_value', 'subIns_value', 'totalGoals_value',
             'goalAssists_value', 'totalShots_value', 'shotsOnTarget_value', 'offsides_value', 'ownGoals_value',
             'shotsFaced_value', 'saves_value', 'goalsConceded_value', 'foulsCommitted_value', 'foulsSuffered_value',
             'yellowCards_value', 'redCards_value']

df_playerstats = df_playerstats[new_order]

df_playerstats = df_playerstats.rename(
    columns={'displayName_y': "Team", 'positionName': 'Position', "displayName_x": "Player Name", "age": "Age",
             "citizenship": "Country", "displayHeight": "Height", 'displayWeight': 'Weight', 'jersey': 'Shirt #',
             "appearances_value": "Appearances", 'subIns_value': "Substitute Appearances", "totalGoals_value": "Goals",
             'goalAssists_value': 'Assists', "totalShots_value": 'Shots', 'shotsOnTarget_value': 'Shots On Target',
             'shotsFaced_value': 'Shots Faced', 'saves_value': 'Saves', 'offsides_value': 'Offsides',
             'ownGoals_value': 'Own Goals', 'goalsConceded_value': 'Goals Conceded',
             'foulsCommitted_value': 'Fouls Committed', 'foulsSuffered_value': 'Fouls Suffered',
             'yellowCards_value': 'Yellow Cards', 'redCards_value': 'Red Cards'})

df_playerstats['Shirt #'] = df_playerstats['Shirt #'].astype(int)
df_playerstats['Age'] = df_playerstats['Age'].astype(int)

position_order = ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']
df_playerstats['Position'] = pd.Categorical(df_playerstats['Position'], categories=position_order, ordered=True)
df_playerstats = df_playerstats.sort_values(by=['Team', 'Position'], ascending=True)

teams_to_drop = ['Burnley', 'Luton Town', 'Sheffield United']
df_playerstats = df_playerstats[~df_playerstats['Team'].isin(teams_to_drop)]

if not df_playerstats.empty:
    season_name = df_playerstats['seasonName'].iloc[0].replace(' ', '_').replace('/', '-')
    file_name_1 = f"{season_name}_player_stats.csv"
    df_playerstats.to_csv(file_name_1, index=False)
    print(f"\nSuccessfully saved data to: {file_name_1}")
else:
    print("\nDataFrame 1 was empty. No file was saved.")

df_playerstats


Successfully saved data to: 2024-25_Italian_Serie_A_player_stats.csv


Unnamed: 0,seasonName,Team,Position,Player Name,Shirt #,Age,Country,Height,Weight,Appearances,...,Shots On Target,Offsides,Own Goals,Shots Faced,Saves,Goals Conceded,Fouls Committed,Fouls Suffered,Yellow Cards,Red Cards
109,2024-25 Italian Serie A,AC Milan,Goalkeeper,Marco Sportiello,57,33,Italy,"6' 4""",192 lbs,2,...,0,1,0,9,4,2,0,0,0,0
146,2024-25 Italian Serie A,AC Milan,Goalkeeper,Mike Maignan,16,30,France,"6' 3""",194 lbs,37,...,0,4,0,356,100,38,1,7,2,0
270,2024-25 Italian Serie A,AC Milan,Goalkeeper,Devis Vásquez,32,27,Colombia,"6' 5""",203 lbs,2,...,0,0,0,31,7,1,0,0,0,0
622,2024-25 Italian Serie A,AC Milan,Goalkeeper,Lapo Nava,69,21,Italy,"6' 6""",201 lbs,0,...,0,0,0,0,0,0,0,0,0,0
656,2024-25 Italian Serie A,AC Milan,Goalkeeper,Noah Raveyre,22,20,France,"6' 0""",163 lbs,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335,2024-25 Italian Serie A,Venezia,Forward,Alessio Zerbin,7,26,Italy,"6' 0""",174 lbs,20,...,6,3,0,0,3,22,34,35,4,0
378,2024-25 Italian Serie A,Venezia,Forward,John Yeboah,-1,25,Ecuador,"5' 7""",146 lbs,33,...,10,2,0,0,0,22,14,21,6,0
582,2024-25 Italian Serie A,Venezia,Forward,Antonio Raimondo,-1,21,Italy,"6' 1""",161 lbs,9,...,1,0,0,0,0,0,2,1,0,0
608,2024-25 Italian Serie A,Venezia,Forward,Daniel Fila,7,23,Czechia,,,10,...,5,0,0,0,0,6,13,7,2,1


In [12]:
df_playerstats2 = pd.merge(df_playerstats2, df_players, how = "inner", left_on = ["athleteId"], right_on = ["athleteId"])
df_playerstats2 = pd.merge(df_playerstats2, df_teams, how = "inner", left_on = ["teamId"], right_on = ["teamId"])
df_playerstats2 = pd.merge(df_playerstats2, df_leagues, how = "inner", left_on = ["seasonType"], right_on = ["seasonType"])

df_playerstats2

Unnamed: 0,seasonType,year_x,league,teamId,athleteId,appearances_value,subIns_value,foulsCommitted_value,foulsSuffered_value,yellowCards_value,...,logoURL,venueId,slug_y,year_y,seasonName,seasonSlug,leagueId,midsizeName,leagueName,leagueShortName
0,13481,2025,ENG.1,368,4946,7,0,4,1,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,10318,eng.everton,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League
1,13481,2025,ENG.1,367,6327,0,0,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,195,eng.tottenham,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League
2,13481,2025,ENG.1,368,7441,0,0,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,10318,eng.everton,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League
3,13481,2025,ENG.1,331,26843,5,4,3,2,1,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,4440,eng.brighton,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League
4,13481,2025,ENG.1,357,30116,4,2,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,190,eng.leeds,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,13481,2025,ENG.1,349,397213,1,1,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,6020,eng.bournemouth,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League
528,13481,2025,ENG.1,359,397263,2,2,0,2,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,2267,eng.arsenal,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League
529,13481,2025,ENG.1,349,398123,0,0,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,6020,eng.bournemouth,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League
530,13481,2025,ENG.1,331,403702,0,0,0,0,0,...,https://a.espncdn.com/i/teamlogos/soccer/500/3...,4440,eng.brighton,2025,2025-26 English Premier League,2025-26-english-premier-league,700,ENG.1,English Premier League,Premier League


In [13]:
dropped_columns = [
    'seasonType', 'teamId', 'athleteId', 'timestamp_x', 'firstName',
    'middleName', 'lastName', 'fullName', 'shortName', 'nickName', 'slug_x',
    'gender', 'birthPlaceCountry', 'positionAbbreviation', 'headshotUrl',
    'headshot_alt', 'timestamp_y', 'location', 'name', 'abbreviation',
    'shortDisplayName', 'color', 'alternateColor', 'logoURL', 'venueId',
    'slug_y', 'seasonSlug', 'leagueId', 'midsizeName', 'leagueName',
    'leagueShortName'
]
df_playerstats2 = df_playerstats2.drop(columns=dropped_columns)
df_playerstats2 = df_playerstats2.drop(columns={"league", })

new_order = [
    'seasonName', 'displayName_y', 'positionName', 'displayName_x',
    'jersey', 'age', 'citizenship', 'displayHeight', 'displayWeight',
    'appearances_value', 'subIns_value', 'totalGoals_value', 'goalAssists_value',
    'totalShots_value', 'shotsOnTarget_value', 'offsides_value', 'ownGoals_value',
    'shotsFaced_value', 'saves_value', 'goalsConceded_value', 'foulsCommitted_value',
    'foulsSuffered_value', 'yellowCards_value', 'redCards_value'
]
df_playerstats2 = df_playerstats2[new_order]

df_playerstats2 = df_playerstats2.rename(columns={
    'year_x': 'Year',
    'displayName_y': "Team",
    'positionName': 'Position',
    "displayName_x": "Player Name",
    "age": "Age",
    "citizenship": "Country",
    "displayHeight": "Height",
    'displayWeight': 'Weight',
    'jersey': 'Shirt #',
    "appearances_value": "Appearances",
    'subIns_value': "Substitute Appearances",
    "totalGoals_value": "Goals",
    'goalAssists_value': 'Assists',
    "totalShots_value": 'Shots',
    'shotsOnTarget_value': 'Shots On Target',
    'shotsFaced_value': 'Shots Faced',
    'saves_value': 'Saves',
    'offsides_value': 'Offsides',
    'ownGoals_value': 'Own Goals',
    'goalsConceded_value': 'Goals Conceded',
    'foulsCommitted_value': 'Fouls Committed',
    'foulsSuffered_value': 'Fouls Suffered',
    'yellowCards_value': 'Yellow Cards',
    'redCards_value': 'Red Cards'
})

df_playerstats2['Shirt #'] = df_playerstats2['Shirt #'].astype(int)
df_playerstats2['Age'] = df_playerstats2['Age'].astype(int)

position_order = ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']
df_playerstats2['Position'] = pd.Categorical(df_playerstats2['Position'], categories=position_order, ordered=True)
df_playerstats2 = df_playerstats2.sort_values(by=['Team', 'Position'], ascending=True)

teams_to_drop = ['Burnley', 'Luton Town', 'Sheffield United']
df_playerstats2 = df_playerstats2[~df_playerstats2['Team'].isin(teams_to_drop)]

if not df_playerstats2.empty:
    season_name = df_playerstats2['seasonName'].iloc[0].replace(' ', '_').replace('/', '-')
    file_name_2 = f"{season_name}_player_stats.csv"
    df_playerstats2.to_csv(file_name_2, index=False)
    print(f"\nSuccessfully saved data to: {file_name_2}")
else:
    print("\nDataFrame 2 was empty. No file was saved.")

df_playerstats2


Successfully saved data to: 2025-26_English_Premier_League_player_stats.csv


Unnamed: 0,seasonName,Team,Position,Player Name,Shirt #,Age,Country,Height,Weight,Appearances,...,Shots On Target,Offsides,Own Goals,Shots Faced,Saves,Goals Conceded,Fouls Committed,Fouls Suffered,Yellow Cards,Red Cards
336,2025-26 English Premier League,AFC Bournemouth,Goalkeeper,Djordje Petrovic,1,25,Serbia,"6' 4""",194 lbs,7,...,0,0,0,0,19,8,0,0,1,0
341,2025-26 English Premier League,AFC Bournemouth,Goalkeeper,William Dennis,40,25,England,"6' 2""",183 lbs,0,...,0,0,0,0,0,0,0,0,0,0
19,2025-26 English Premier League,AFC Bournemouth,Defender,Adam Smith,15,34,England,"5' 11""",172 lbs,3,...,0,0,0,0,0,3,3,3,0,0
172,2025-26 English Premier League,AFC Bournemouth,Defender,Marcos Senesi,5,28,Argentina,"6' 1""",174 lbs,7,...,0,0,0,0,0,8,5,7,1,0
300,2025-26 English Premier League,AFC Bournemouth,Defender,James Hill,23,23,England,"6' 0""",161 lbs,6,...,1,0,0,0,0,2,4,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,2025-26 English Premier League,Wolverhampton Wanderers,Forward,Hwang Hee-Chan,11,29,South Korea,"5' 10""",170 lbs,6,...,2,2,0,0,0,4,5,4,0,0
232,2025-26 English Premier League,Wolverhampton Wanderers,Forward,Jørgen Strand Larsen,-1,25,Norway,"6' 4""",172 lbs,5,...,3,1,0,0,0,5,9,3,0,0
245,2025-26 English Premier League,Wolverhampton Wanderers,Forward,Sasa Kalajdzic,10,28,Austria,"6' 7""",196 lbs,1,...,0,0,0,0,0,0,0,0,0,0
391,2025-26 English Premier League,Wolverhampton Wanderers,Forward,Tolu Arokodare,-1,24,Nigeria,"6' 6""",214 lbs,4,...,1,0,0,0,0,5,2,3,0,0
