In [8]:
import pandas as pd

# Load the dataset
file_path = "/Users/marclambertes/Python/Matches/Men/2024-2025/Scotland 2024-2025/Rangers 4-0 Ross County.csv"
df = pd.read_csv(file_path)


# Ensure relevant columns exist
required_columns = {'typeId', 'outcome', 'contestantId', 'playerName'}
if not required_columns.issubset(df.columns):
    raise ValueError(f"The Excel file must contain the following columns: {required_columns}")

# Define constants for event types
PASS_TYPE_ID = 1
DRIBBLE_TYPE_ID = 3
BYPASS_TYPE_ID = 45  # Represents when an opponent is bypassed
DEFENSE_TYPE_IDS = {7, 8}  # Type IDs for opponent defense actions
SUCCESSFUL_OUTCOME = 'successful'  # Assuming 'outcome' column indicates success

# Filter for successful passes (typeId == 1, successful outcome)
df_passes = df[(df['typeId'] == PASS_TYPE_ID) & (df['outcome'] == SUCCESSFUL_OUTCOME)]

# Filter for successful dribbles (typeId == 3, successful outcome)
df_dribbles = df[(df['typeId'] == DRIBBLE_TYPE_ID) & (df['outcome'] == SUCCESSFUL_OUTCOME)]

# Filter for bypassed opponents (typeId == 45)
df_bypassed = df[df['typeId'] == BYPASS_TYPE_ID]

# Filter for unsuccessful defensive actions (typeId 7 or 8, unsuccessful outcome)
df_defense = df[(df['typeId'].isin(DEFENSE_TYPE_IDS)) & (df['outcome'] != SUCCESSFUL_OUTCOME)]

# Count the number of unique opponents bypassed for each player (from passes, dribbles, and bypassed actions)
df_passes_grouped = df_passes.groupby('playerName')['contestantId'].nunique().reset_index()
df_passes_grouped.rename(columns={'contestantId': 'Opponents Bypassed by Passes'}, inplace=True)

df_dribbles_grouped = df_dribbles.groupby('playerName')['contestantId'].nunique().reset_index()
df_dribbles_grouped.rename(columns={'contestantId': 'Opponents Bypassed by Dribbles'}, inplace=True)

df_bypassed_grouped = df_bypassed.groupby('playerName')['contestantId'].nunique().reset_index()
df_bypassed_grouped.rename(columns={'contestantId': 'Opponents Bypassed'}, inplace=True)

# Group defense data by opponent (contestantId) to identify unique defenders bypassed
df_defense_grouped = df_defense.groupby('playerName')['contestantId'].nunique().reset_index()
df_defense_grouped.rename(columns={'contestantId': 'Opponents Bypassed by Defense'}, inplace=True)

# Merge all the data (passes, dribbles, bypassed actions, defense) into one DataFrame
packing_df = pd.merge(df_passes_grouped, df_dribbles_grouped, on='playerName', how='outer').fillna(0)
packing_df = pd.merge(packing_df, df_bypassed_grouped, on='playerName', how='outer').fillna(0)
packing_df = pd.merge(packing_df, df_defense_grouped, on='playerName', how='outer').fillna(0)

# Calculate Packing Score and Packing Rate per Action
packing_df['Packing Score'] = packing_df['Opponents Bypassed by Passes'] + packing_df['Opponents Bypassed by Dribbles'] + packing_df['Opponents Bypassed']
packing_df['Total Actions'] = packing_df['Opponents Bypassed by Passes'] + packing_df['Opponents Bypassed by Dribbles'] + packing_df['Opponents Bypassed by Defense']
packing_df['Packing Rate per Action'] = packing_df['Packing Score'] / packing_df['Total Actions']

# Handle division by zero if there are no actions
packing_df['Packing Rate per Action'].fillna(0, inplace=True)

# Sort by Packing Score
packing_df = packing_df.sort_values(by='Packing Score', ascending=False)

print("Packing Scores and Rates for Playmakers:")
print(packing_df[['playerName', 'Packing Score', 'Packing Rate per Action']])

# Save to Excel for further analysis
packing_df.to_excel('Packing_Scores_Output.xlsx', index=False)



Packing Scores and Rates for Playmakers:
      playerName  Packing Score  Packing Rate per Action
0     C. Dessers            1.0                      1.0
2        I. Hagi            1.0                      1.0
3      J. Nisbet            1.0                      1.0
4   J. Tomkinson            1.0                      1.0
5          Jefté            1.0                      1.0
6    K. Phillips            1.0                      1.0
7    N. Chilvers            1.0                      1.0
8      N. Raskin            1.0                      1.0
1    E. Campbell            1.0                      1.0
16    J. Souttar            0.0                      0.0
21    R. Pröpper            0.0                      0.0
20     N. Kenneh            0.0                      0.0
19   M. Diomande            0.0                      0.0
18     K. Łopata            0.0                      0.0
17  J. Tavernier            0.0                      0.0
11     C. Nsiala            0.0                