In [14]:
import pandas as pd
import os
import datetime
import kaggle
import zipfile


from directory import KAGGLE_DATASET_SLUG, KAGGLE_DATA_ROOT, BASE_DATA_DIR, PLAYS_DATA_DIR


if not os.path.exists(KAGGLE_DATA_ROOT):
    print(f"Data directory '{KAGGLE_DATA_ROOT}' not found. Downloading from Kaggle...")

    print(f"Downloading dataset '{KAGGLE_DATASET_SLUG}'...")
    kaggle.api.dataset_download_files(
        KAGGLE_DATASET_SLUG,
        path=data_dir,
        unzip=True
    )
    print(f"✅ Download and unzip complete.")
else:
    print(f"Data directory '{KAGGLE_DATA_ROOT}' already exists. Skipping download.")

df_leagues = pd.read_csv(BASE_DATA_DIR + "/leagues.csv")
df_players = pd.read_csv(BASE_DATA_DIR + "/players.csv", low_memory=False)
df_teams = pd.read_csv(BASE_DATA_DIR + "/teams.csv")
df_fixtures = pd.read_csv(BASE_DATA_DIR + "/fixtures.csv")
df_teamStats = pd.read_csv(BASE_DATA_DIR + "/teamStats.csv")
df_teamRoster = pd.read_csv(BASE_DATA_DIR + "/teamRoster.csv")
df_keyEventDescription = pd.read_csv(BASE_DATA_DIR + "/keyEventDescription.csv")
df_status = pd.read_csv(BASE_DATA_DIR + "/status.csv")
df_venues = pd.read_csv(BASE_DATA_DIR + "/venues.csv")
df_standings = pd.read_csv(BASE_DATA_DIR + "/standings.csv")

Data directory './kaggle_data' already exists. Skipping download.


In [15]:
# Ask the user for the specific 'plays' file they want to analyze
plays_filename = input("Enter the 'plays' file name (e.g., plays_2024_ENG.1): ")

# Construct the full file path
full_plays_path = PLAYS_DATA_DIR + plays_filename + ".csv"

print(f"--- Loading plays data from: {full_plays_path} ---")

# Load the specified CSV file
# I've added low_memory=False, as 'plays' files can be very large
try:
    df_plays = pd.read_csv(full_plays_path, low_memory=False)
    print("✅ Plays file loaded successfully!")
except FileNotFoundError:
    print(f"❌ ERROR: File not found at {full_plays_path}")
    print("Please check the filename and try again.")
except Exception as e:
    print(f"An error occurred: {e}")

# This cell is now separate, so you can inspect df_plays after loading

--- Loading plays data from: ./kaggle_data/plays_Data/plays_2024_ENG.1.csv ---
✅ Plays file loaded successfully!


In [16]:
df_plays

Unnamed: 0,seasonType,eventId,playOrder,playId,typeId,text,shortText,period,clockValue,clockDisplayValue,...,wallclock,goalPositionX,goalPositionY,fieldpositionX,fieldPositionY,fieldPosition2X,fieldPosition2Y,athleteId,participant,updateDateTime
0,12654,704279,1,43556191,80,First Half begins.,,1,0,,...,2024-08-17 06:11:52,0.0,0.0,0.000,0.000,0.000,0.000,,,2024-08-18 05:55:35
1,12654,704279,2,43556204,68,"Offside, Manchester United. Marcus Rashford is...",Casemiro Offside,1,16,1',...,2024-08-17 06:12:08,0.0,0.0,0.770,0.736,0.832,0.047,173666.0,Casemiro,2024-08-18 05:55:35
2,12654,704279,3,43556214,66,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,184,4',...,2024-08-17 06:12:08,0.0,0.0,0.554,0.220,0.000,0.000,328466.0,Kobbie Mainoo,2024-08-18 05:55:35
3,12654,704279,3,43556214,66,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,184,4',...,2024-08-17 06:12:08,0.0,0.0,0.554,0.220,0.000,0.000,,Andreas Pereira,2024-08-18 05:55:35
4,12654,704279,4,43556227,66,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,277,5',...,2024-08-17 06:12:08,0.0,0.0,0.500,0.756,0.000,0.000,328466.0,Kobbie Mainoo,2024-08-18 05:55:35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46676,12654,704658,82,45319504,117,Attempt missed. Chido Obi (Manchester United) ...,Chido Obi Shot Off Target,2,5400,90'+7',...,2025-05-25 16:59:14,0.0,0.0,0.176,0.473,0.000,0.393,375738.0,Chido Obi,2025-05-29 04:48:06
46677,12654,704658,83,45319506,68,"Offside, Manchester United. Kobbie Mainoo is c...",Christian Eriksen Offside,2,5400,90'+8',...,2025-05-25 17:00:19,0.0,0.0,0.328,0.424,0.134,0.768,144130.0,Christian Eriksen,2025-05-29 04:48:06
46678,12654,704658,75,45319434,135,Attempt blocked. Youri Tielemans (Aston Villa)...,Youri Tielemans Shot Blocked,2,5400,90'+1',...,2025-05-25 16:53:23,0.0,0.0,0.278,0.494,0.254,0.493,259481.0,Donyell Malen,2025-05-29 04:48:06
46679,12654,704658,78,45319465,66,Foul by Casemiro (Manchester United).,Casemiro Foul,2,5400,90'+4',...,2025-05-25 16:55:36,0.0,0.0,0.476,0.680,0.000,0.000,,Donyell Malen,2025-05-29 04:48:06


In [17]:
#df_keyevents = pd.merge(df_keyevents, df_players, how = "inner", left_on = ["athleteId"], right_on = ["athleteId"])

#df_keyevents = pd.merge(df_keyevents, df_players, how = "inner", left_on = ["athleteId"], right_on = ["athleteId"])

In [18]:
df_plays = pd.merge(df_plays,df_teams, how="inner", left_on=['teamId'],right_on=['teamId'])
df_plays.rename(columns={'name':'team'},inplace=True)
df_plays_columns = [
    "seasonType",
    "eventId",
    "playOrder",
    "playId",
    "typeId",
    "text",
    "shortText",
    "period",
    "clockValue",
    "clockDisplayValue",
    "team",
    "scoringPlay",
    "shootout",
    "wallclock",
    "goalPositionX",
    "goalPositionY",
    "fieldpositionX",
    "fieldPositionY",
    "fieldPosition2X",
    "fieldPosition2Y",
    "athleteId",
    "participant",
    "updateDateTime"
]
df_plays = df_plays[df_plays_columns]
df_plays

Unnamed: 0,seasonType,eventId,playOrder,playId,typeId,text,shortText,period,clockValue,clockDisplayValue,...,wallclock,goalPositionX,goalPositionY,fieldpositionX,fieldPositionY,fieldPosition2X,fieldPosition2Y,athleteId,participant,updateDateTime
0,12654,704279,2,43556204,68,"Offside, Manchester United. Marcus Rashford is...",Casemiro Offside,1,16,1',...,2024-08-17 06:12:08,0.0,0.0,0.770,0.736,0.832,0.047,173666.0,Casemiro,2024-08-18 05:55:35
1,12654,704279,3,43556214,66,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,184,4',...,2024-08-17 06:12:08,0.0,0.0,0.554,0.220,0.000,0.000,328466.0,Kobbie Mainoo,2024-08-18 05:55:35
2,12654,704279,3,43556214,66,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,184,4',...,2024-08-17 06:12:08,0.0,0.0,0.554,0.220,0.000,0.000,,Andreas Pereira,2024-08-18 05:55:35
3,12654,704279,4,43556227,66,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,277,5',...,2024-08-17 06:12:08,0.0,0.0,0.500,0.756,0.000,0.000,328466.0,Kobbie Mainoo,2024-08-18 05:55:35
4,12654,704279,4,43556227,66,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,277,5',...,2024-08-17 06:12:08,0.0,0.0,0.500,0.756,0.000,0.000,,Andreas Pereira,2024-08-18 05:55:35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45872,12654,704658,82,45319504,117,Attempt missed. Chido Obi (Manchester United) ...,Chido Obi Shot Off Target,2,5400,90'+7',...,2025-05-25 16:59:14,0.0,0.0,0.176,0.473,0.000,0.393,375738.0,Chido Obi,2025-05-29 04:48:06
45873,12654,704658,83,45319506,68,"Offside, Manchester United. Kobbie Mainoo is c...",Christian Eriksen Offside,2,5400,90'+8',...,2025-05-25 17:00:19,0.0,0.0,0.328,0.424,0.134,0.768,144130.0,Christian Eriksen,2025-05-29 04:48:06
45874,12654,704658,75,45319434,135,Attempt blocked. Youri Tielemans (Aston Villa)...,Youri Tielemans Shot Blocked,2,5400,90'+1',...,2025-05-25 16:53:23,0.0,0.0,0.278,0.494,0.254,0.493,259481.0,Donyell Malen,2025-05-29 04:48:06
45875,12654,704658,78,45319465,66,Foul by Casemiro (Manchester United).,Casemiro Foul,2,5400,90'+4',...,2025-05-25 16:55:36,0.0,0.0,0.476,0.680,0.000,0.000,,Donyell Malen,2025-05-29 04:48:06


In [19]:
df_plays = pd.merge(df_plays,df_keyEventDescription, how="left",
                    left_on=['typeId'],right_on=['keyEventTypeId'])


df_plays.rename(columns={'keyEventName':'playDescription'},inplace=True)
df_plays_columns = [
    "seasonType",
    "eventId",
    "playOrder",
    "playId",
    "playDescription",
    "text",
    "shortText",
    "period",
    "clockValue",
    "clockDisplayValue",
    "team",
    "scoringPlay",
    "shootout",
    "wallclock",
    "goalPositionX",
    "goalPositionY",
    "fieldpositionX",
    "fieldPositionY",
    "fieldPosition2X",
    "fieldPosition2Y",
    "athleteId",
    "participant",
    "updateDateTime"
]
df_plays = df_plays[df_plays_columns]
df_plays

Unnamed: 0,seasonType,eventId,playOrder,playId,playDescription,text,shortText,period,clockValue,clockDisplayValue,...,wallclock,goalPositionX,goalPositionY,fieldpositionX,fieldPositionY,fieldPosition2X,fieldPosition2Y,athleteId,participant,updateDateTime
0,12654,704279,2,43556204,Offside,"Offside, Manchester United. Marcus Rashford is...",Casemiro Offside,1,16,1',...,2024-08-17 06:12:08,0.0,0.0,0.770,0.736,0.832,0.047,173666.0,Casemiro,2024-08-18 05:55:35
1,12654,704279,3,43556214,Foul,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,184,4',...,2024-08-17 06:12:08,0.0,0.0,0.554,0.220,0.000,0.000,328466.0,Kobbie Mainoo,2024-08-18 05:55:35
2,12654,704279,3,43556214,Foul,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,184,4',...,2024-08-17 06:12:08,0.0,0.0,0.554,0.220,0.000,0.000,,Andreas Pereira,2024-08-18 05:55:35
3,12654,704279,4,43556227,Foul,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,277,5',...,2024-08-17 06:12:08,0.0,0.0,0.500,0.756,0.000,0.000,328466.0,Kobbie Mainoo,2024-08-18 05:55:35
4,12654,704279,4,43556227,Foul,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,277,5',...,2024-08-17 06:12:08,0.0,0.0,0.500,0.756,0.000,0.000,,Andreas Pereira,2024-08-18 05:55:35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45872,12654,704658,82,45319504,Shot Off Target,Attempt missed. Chido Obi (Manchester United) ...,Chido Obi Shot Off Target,2,5400,90'+7',...,2025-05-25 16:59:14,0.0,0.0,0.176,0.473,0.000,0.393,375738.0,Chido Obi,2025-05-29 04:48:06
45873,12654,704658,83,45319506,Offside,"Offside, Manchester United. Kobbie Mainoo is c...",Christian Eriksen Offside,2,5400,90'+8',...,2025-05-25 17:00:19,0.0,0.0,0.328,0.424,0.134,0.768,144130.0,Christian Eriksen,2025-05-29 04:48:06
45874,12654,704658,75,45319434,Shot Blocked,Attempt blocked. Youri Tielemans (Aston Villa)...,Youri Tielemans Shot Blocked,2,5400,90'+1',...,2025-05-25 16:53:23,0.0,0.0,0.278,0.494,0.254,0.493,259481.0,Donyell Malen,2025-05-29 04:48:06
45875,12654,704658,78,45319465,Foul,Foul by Casemiro (Manchester United).,Casemiro Foul,2,5400,90'+4',...,2025-05-25 16:55:36,0.0,0.0,0.476,0.680,0.000,0.000,,Donyell Malen,2025-05-29 04:48:06


In [20]:
df_plays = pd.merge(df_plays, df_leagues, how = "inner", left_on = ["seasonType"], right_on = ["seasonType"])

df_plays_columns = [
    "seasonName",
    "eventId",
    "playId",
    "playDescription",
    "text",
    "shortText",
    "period",
    "clockDisplayValue",
    "team",
    "participant",
]

df_plays = df_plays[df_plays_columns]

df_plays.dropna(inplace=True)

if not df_plays.empty:

    season_name = df_plays['seasonName'].iloc[0].replace(' ', '_').replace('/', '-')
    file_name = f"{season_name}_plays_data.csv"


    df_plays.to_csv(file_name, index=False)

    print(f"\nSuccessfully saved data to: {file_name}")
else:
    print("\nDataFrame was empty. No file was saved.")
df_plays


Successfully saved data to: 2024-25_English_Premier_League_plays_data.csv


Unnamed: 0,seasonName,eventId,playId,playDescription,text,shortText,period,clockDisplayValue,team,participant
0,2024-25 English Premier League,704279,43556204,Offside,"Offside, Manchester United. Marcus Rashford is...",Casemiro Offside,1,1',Manchester United,Casemiro
1,2024-25 English Premier League,704279,43556214,Foul,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,4',Manchester United,Kobbie Mainoo
2,2024-25 English Premier League,704279,43556214,Foul,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,4',Manchester United,Andreas Pereira
3,2024-25 English Premier League,704279,43556227,Foul,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,5',Manchester United,Kobbie Mainoo
4,2024-25 English Premier League,704279,43556227,Foul,Foul by Kobbie Mainoo (Manchester United).,Kobbie Mainoo Foul,1,5',Manchester United,Andreas Pereira
...,...,...,...,...,...,...,...,...,...,...
45872,2024-25 English Premier League,704658,45319504,Shot Off Target,Attempt missed. Chido Obi (Manchester United) ...,Chido Obi Shot Off Target,2,90'+7',Manchester United,Chido Obi
45873,2024-25 English Premier League,704658,45319506,Offside,"Offside, Manchester United. Kobbie Mainoo is c...",Christian Eriksen Offside,2,90'+8',Manchester United,Christian Eriksen
45874,2024-25 English Premier League,704658,45319434,Shot Blocked,Attempt blocked. Youri Tielemans (Aston Villa)...,Youri Tielemans Shot Blocked,2,90'+1',Aston Villa,Donyell Malen
45875,2024-25 English Premier League,704658,45319465,Foul,Foul by Casemiro (Manchester United).,Casemiro Foul,2,90'+4',Manchester United,Donyell Malen
