In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

# Step 1: Load and Prepare the Data
df = pd.read_csv("/Users/marclambertes/Python/Matches/Men/2024-2025/Eredivisie 2024-2025/Twente 1-0 Almere.csv")
type_cols = [col for col in df.columns if '/qualifierId' in col]

# Step 2: Initialize 'endX', 'endY', 'pass_angle', and 'pass_length' columns with 0.0
df['endX'] = 0.0
df['endY'] = 0.0
df['pass_angle'] = 0.0
df['pass_length'] = 0.0

# Step 3: Extract 'endX', 'endY', 'pass_angle', and 'pass_length' from qualifiers
for i in range(len(df)):
    df1 = df.iloc[i:i+1, :]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 140:  # Qualifier ID for endX
            endx = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            df.at[i, 'endX'] = endx
        elif col == 141:  # Qualifier ID for endY
            endy = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            df.at[i, 'endY'] = endy
        elif col == 213:  # Qualifier ID for pass angle
            angle = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            df.at[i, 'pass_angle'] = angle
        elif col == 212:  # Qualifier ID for pass length
            length = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            df.at[i, 'pass_length'] = length

# Step 4: Calculate recipientId based on the next action
df['recipientId'] = np.nan  # Initialize recipientId column

for i in range(len(df) - 1):  # Stop at the second-to-last row
    if df.loc[i, 'typeId'] == 1:  # Check if the current row is a pass
        next_row = df.loc[i + 1]  # Get the next row
        if next_row['contestantId'] == df.loc[i, 'contestantId']:  # Check if the next action is from the same team
            df.at[i, 'recipientId'] = next_row['playerId']  # Set the recipientId to the next playerId

# Fill missing recipientId values (e.g., if the next action is not from the same team)
df['recipientId'] = df['recipientId'].fillna(-1)  # Use -1 or another placeholder for missing values

# Step 5: Add Passer and Receiver
df['passer'] = df['playerId']
df['receiver'] = df['recipientId']

# Step 6: Add Pass Type Features
df['Cross'] = 0
df['Through-ball'] = 0
df['LongBall'] = 0
df['Switch'] = 0
df['Assist'] = 0
df['2nd Assist'] = 0

for i in range(len(df)):
    df1 = df.iloc[i:i+1, :]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 2:  # Qualifier ID for Cross
            df.at[i, 'Cross'] = 1
        elif col == 4:  # Qualifier ID for Through-ball
            df.at[i, 'Through-ball'] = 1
        elif col == 1:  # Qualifier ID for LongBall
            df.at[i, 'LongBall'] = 1
        elif col == 196:  # Qualifier ID for Switch
            df.at[i, 'Switch'] = 1
        elif col == 210:  # Qualifier ID for Assist
            df.at[i, 'Assist'] = 1
        elif col == 218:  # Qualifier ID for 2nd Assist
            df.at[i, '2nd Assist'] = 1

# Step 7: Add Pass Type as a Categorical Feature
df['pass_type'] = 'Other'  # Default value
df.loc[df['Cross'] == 1, 'pass_type'] = 'Cross'
df.loc[df['Through-ball'] == 1, 'pass_type'] = 'Through-ball'
df.loc[df['LongBall'] == 1, 'pass_type'] = 'LongBall'
df.loc[df['Switch'] == 1, 'pass_type'] = 'Switch'
df.loc[df['Assist'] == 1, 'pass_type'] = 'Assist'
df.loc[df['2nd Assist'] == 1, 'pass_type'] = '2nd Assist'

# Step 8: Encode Categorical Features
df = pd.get_dummies(df, columns=['pass_type', 'passer', 'receiver'], drop_first=True)

# Step 9: Clean and prepare the data
# Convert columns to numeric and handle missing/invalid data
numeric_cols = ['x', 'y', 'endX', 'endY', 'pass_length', 'pass_angle']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert to numeric, invalid -> NaN

# Drop rows with missing values in numeric columns
df = df.dropna(subset=numeric_cols)

# Step 10: Feature Engineering
# Load xT grid
xT = pd.read_csv("xT_grid.csv", header=None)
xT = np.array(xT)
xT_rows, xT_cols = xT.shape

# Bin coordinates into xT grid zones
df['x1_bin'] = pd.cut(df['x'], bins=xT_cols, labels=False)
df['y1_bin'] = pd.cut(df['y'], bins=xT_rows, labels=False)
df['x2_bin'] = pd.cut(df['endX'], bins=xT_cols, labels=False)
df['y2_bin'] = pd.cut(df['endY'], bins=xT_rows, labels=False)

# Calculate xT and xPass
df['start_zone_value'] = df.apply(lambda row: xT[row['y1_bin'], row['x1_bin']], axis=1)
df['end_zone_value'] = df.apply(lambda row: xT[row['y2_bin'], row['x2_bin']], axis=1)
df['xT'] = df['end_zone_value'] - df['start_zone_value']

df['distance'] = np.sqrt((df['endX'] - df['x']) ** 2 + (df['endY'] - df['y']) ** 2)
df['angle'] = np.abs(np.arctan2(df['endY'] - df['y'], df['endX'] - df['x']))
df['xPass'] = 1 - (0.02 * df['distance'] + 0.1 * df['angle'])
df['xPass'] = df['xPass'].clip(0, 1)

# Step 11: Define Target Variable
df['defensive_intervention'] = df['typeId'].astype(str).isin(['7', '8']).astype(int)

# Step 12: Pressing Events Analysis
# Define event type IDs
PASS_EVENTS = {1}  # Adjust if needed
PRESSING_EVENTS = {7, 8, 44, 45, 49}

# Sort events by period, time, and sequence
df = df.sort_values(by=["periodId", "timeMin", "timeSec"])

# Shift previous event details (for contestantId and typeId)
df["prev_contestantId"] = df["contestantId"].shift(1)
df["prev_typeId"] = df["typeId"].shift(1)

# Filter pressing events where the previous event was a pass by the other team
pressing_events = df[
    (df["typeId"].isin(PRESSING_EVENTS)) & 
    (df["prev_typeId"].isin(PASS_EVENTS)) & 
    (df["contestantId"] != df["prev_contestantId"])  # Different team
]

# Count pressing events by player
pressing_count = pressing_events.groupby(["contestantId", "playerName"])["id"].count().reset_index()
pressing_count.rename(columns={"id": "Pressures"}, inplace=True)

# Calculate Pressures per 90 minutes (PP90)
total_minutes = df["timeMin"].max()  # Assuming match is complete
pressing_count["PP90"] = (pressing_count["Pressures"] / total_minutes) * 90

# Merge Pressures and PP90 back into the main DataFrame
df = df.merge(pressing_count[["playerName", "Pressures", "PP90"]], on="playerName", how="left")

# Fill missing values in Pressures and PP90 (for players with no pressing events)
df['Pressures'] = df['Pressures'].fillna(0)
df['PP90'] = df['PP90'].fillna(0)

# Step 13: Select Features and Target
features = ['xT', 'xPass', 'distance', 'angle', 'x', 'y', 'endX', 'endY', 'Pressures', 'PP90'] + \
           [col for col in df.columns if col.startswith('pass_type_') or col.startswith('passer_') or col.startswith('receiver_')]

X = df[features]
y = df['defensive_intervention']

# Step 14: Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 15: Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=1000, random_state=42)
model.fit(X_train, y_train)

# Step 16: Evaluate the Model
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("ROC AUC Score:", roc_auc_score(y_test, y_pred_proba))

# Step 17: Feature Importance
feature_importances = pd.Series(model.feature_importances_, index=features)
feature_importances.sort_values(ascending=False, inplace=True)

plt.figure(figsize=(10, 6))
sns.barplot(x=feature_importances, y=feature_importances.index)
plt.title('Feature Importance')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()

# Step 18: Save the Model
joblib.dump(model, "disruption_model_with_receiver.pkl")
print("\nModel saved as 'disruption_model_with_receiver.pkl'.")

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os

# Step 1: Define the folder containing CSV files
folder_path = "/Users/marclambertes/Python/Matches/Men/2024-2025/Eredivisie 2024-2025"

# Step 2: Load xT Grid
xT = pd.read_csv("xT_grid.csv", header=None)
xT = np.array(xT)
xT_rows, xT_cols = xT.shape
print("xT grid dimensions:", xT_rows, xT_cols)

# Step 3: Combine all CSV files into one DataFrame
all_data = pd.DataFrame()

for file_name in os.listdir(folder_path):
    if file_name.endswith(".csv"):
        print(f"Loading file: {file_name}")
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_csv(file_path)
        all_data = pd.concat([all_data, df], ignore_index=True)

print("\nAll files combined into a single DataFrame.")

# Step 4: Process Qualifiers for 'endX', 'endY', 'pass_angle', and 'pass_length'
type_cols = [col for col in all_data.columns if '/qualifierId' in col]

# Initialize 'endX', 'endY', 'pass_angle', and 'pass_length' columns with 0.0
all_data['endX'] = 0.0
all_data['endY'] = 0.0
all_data['pass_angle'] = 0.0
all_data['pass_length'] = 0.0

# Extract values from qualifiers
for i in range(len(all_data)):
    df1 = all_data.iloc[i:i+1, :]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 140:  # Qualifier ID for endX
            endx = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            all_data.at[i, 'endX'] = endx
        elif col == 141:  # Qualifier ID for endY
            endy = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            all_data.at[i, 'endY'] = endy
        elif col == 213:  # Qualifier ID for pass angle
            angle = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            all_data.at[i, 'pass_angle'] = angle
        elif col == 212:  # Qualifier ID for pass length
            length = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            all_data.at[i, 'pass_length'] = length

# Step 5: Calculate recipientId based on the next action
all_data['recipientId'] = np.nan  # Initialize recipientId column

for i in range(len(all_data) - 1):  # Stop at the second-to-last row
    if all_data.loc[i, 'typeId'] == 1:  # Check if the current row is a pass
        next_row = all_data.loc[i + 1]  # Get the next row
        if next_row['contestantId'] == all_data.loc[i, 'contestantId']:  # Check if the next action is from the same team
            all_data.at[i, 'recipientId'] = next_row['playerId']  # Set the recipientId to the next playerId

# Fill missing recipientId values (e.g., if the next action is not from the same team)
all_data['recipientId'] = all_data['recipientId'].fillna(-1)  # Use -1 or another placeholder for missing values

# Step 6: Add Passer and Receiver
all_data['passer'] = all_data['playerId']
all_data['receiver'] = all_data['recipientId']

# Step 7: Add Pass Type Features
all_data['Cross'] = 0
all_data['Through-ball'] = 0
all_data['LongBall'] = 0
all_data['Switch'] = 0
all_data['Assist'] = 0
all_data['2nd Assist'] = 0

for i in range(len(all_data)):
    df1 = all_data.iloc[i:i+1, :]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 2:  # Qualifier ID for Cross
            all_data.at[i, 'Cross'] = 1
        elif col == 4:  # Qualifier ID for Through-ball
            all_data.at[i, 'Through-ball'] = 1
        elif col == 1:  # Qualifier ID for LongBall
            all_data.at[i, 'LongBall'] = 1
        elif col == 196:  # Qualifier ID for Switch
            all_data.at[i, 'Switch'] = 1
        elif col == 210:  # Qualifier ID for Assist
            all_data.at[i, 'Assist'] = 1
        elif col == 218:  # Qualifier ID for 2nd Assist
            all_data.at[i, '2nd Assist'] = 1

# Step 8: Add Pass Type as a Categorical Feature
all_data['pass_type'] = 'Other'  # Default value
all_data.loc[all_data['Cross'] == 1, 'pass_type'] = 'Cross'
all_data.loc[all_data['Through-ball'] == 1, 'pass_type'] = 'Through-ball'
all_data.loc[all_data['LongBall'] == 1, 'pass_type'] = 'LongBall'
all_data.loc[all_data['Switch'] == 1, 'pass_type'] = 'Switch'
all_data.loc[all_data['Assist'] == 1, 'pass_type'] = 'Assist'
all_data.loc[all_data['2nd Assist'] == 1, 'pass_type'] = '2nd Assist'

# Step 9: Encode Categorical Features
all_data = pd.get_dummies(all_data, columns=['pass_type', 'passer', 'receiver'], drop_first=True)

# Step 10: Clean and prepare the data
# Convert columns to numeric and handle missing/invalid data
numeric_cols = ['x', 'y', 'endX', 'endY', 'pass_length', 'pass_angle']
for col in numeric_cols:
    all_data[col] = pd.to_numeric(all_data[col], errors='coerce')  # Convert to numeric, invalid -> NaN

# Drop rows with missing values in numeric columns
all_data = all_data.dropna(subset=numeric_cols)

# Step 11: Feature Engineering
# Bin coordinates into xT grid zones
all_data['x1_bin'] = pd.cut(all_data['x'], bins=xT_cols, labels=False)
all_data['y1_bin'] = pd.cut(all_data['y'], bins=xT_rows, labels=False)
all_data['x2_bin'] = pd.cut(all_data['endX'], bins=xT_cols, labels=False)
all_data['y2_bin'] = pd.cut(all_data['endY'], bins=xT_rows, labels=False)

# Calculate xT and xPass
all_data['start_zone_value'] = all_data.apply(lambda row: xT[row['y1_bin'], row['x1_bin']], axis=1)
all_data['end_zone_value'] = all_data.apply(lambda row: xT[row['y2_bin'], row['x2_bin']], axis=1)
all_data['xT'] = all_data['end_zone_value'] - all_data['start_zone_value']

all_data['distance'] = np.sqrt((all_data['endX'] - all_data['x']) ** 2 + (all_data['endY'] - all_data['y']) ** 2)
all_data['angle'] = np.abs(np.arctan2(all_data['endY'] - all_data['y'], all_data['endX'] - all_data['x']))
all_data['xPass'] = 1 - (0.02 * all_data['distance'] + 0.1 * all_data['angle'])
all_data['xPass'] = all_data['xPass'].clip(0, 1)

# Step 12: Define Target Variable
all_data['defensive_intervention'] = all_data['typeId'].astype(str).isin(['7', '8']).astype(int)

# Step 13: Pressing Events Analysis
# Define event type IDs
PASS_EVENTS = {1}  # Adjust if needed
PRESSING_EVENTS = {7, 8, 44, 45, 49}

# Sort events by period, time, and sequence
all_data = all_data.sort_values(by=["periodId", "timeMin", "timeSec"])

# Shift previous event details (for contestantId and typeId)
all_data["prev_contestantId"] = all_data["contestantId"].shift(1)
all_data["prev_typeId"] = all_data["typeId"].shift(1)

# Filter pressing events where the previous event was a pass by the other team
pressing_events = all_data[
    (all_data["typeId"].isin(PRESSING_EVENTS)) & 
    (all_data["prev_typeId"].isin(PASS_EVENTS)) & 
    (all_data["contestantId"] != all_data["prev_contestantId"])  # Different team
]

# Count pressing events by player
pressing_count = pressing_events.groupby(["contestantId", "playerName"])["id"].count().reset_index()
pressing_count.rename(columns={"id": "Pressures"}, inplace=True)

# Calculate Pressures per 90 minutes (PP90)
total_minutes = all_data["timeMin"].max()  # Assuming match is complete
pressing_count["PP90"] = (pressing_count["Pressures"] / total_minutes) * 90

# Merge Pressures and PP90 back into the main DataFrame
all_data = all_data.merge(pressing_count[["playerName", "Pressures", "PP90"]], on="playerName", how="left")

# Fill missing values in Pressures and PP90 (for players with no pressing events)
all_data['Pressures'] = all_data['Pressures'].fillna(0)
all_data['PP90'] = all_data['PP90'].fillna(0)

# Step 14: Select Features and Target
features = ['xT', 'xPass', 'distance', 'angle', 'x', 'y', 'endX', 'endY', 'Pressures', 'PP90'] + \
           [col for col in all_data.columns if col.startswith('pass_type_') or col.startswith('passer_') or col.startswith('receiver_')]

X = all_data[features]
y = all_data['defensive_intervention']

# Step 15: Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 16: Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=1000, random_state=42)
model.fit(X_train, y_train)

# Step 17: Evaluate the Model
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("ROC AUC Score:", roc_auc_score(y_test, y_pred_proba))

# Step 18: Feature Importance
feature_importances = pd.Series(model.feature_importances_, index=features)
feature_importances.sort_values(ascending=False, inplace=True)

plt.figure(figsize=(10, 6))
sns.barplot(x=feature_importances, y=feature_importances.index)
plt.title('Feature Importance')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()

# Step 19: Save the Model
joblib.dump(model, "disruption_model_combined.pkl")

print("\nModel saved as 'disruption_model_combined.pkl'.")

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import joblib

# Step 1: Load the saved model and scaler
model = joblib.load("disruption_model_with_receiver.pkl")

# Step 2: Load the new dataset (replace with your file path)
new_data = pd.read_csv("/Users/marclambertes/Python/Matches/Men/2024-2025/Eredivisie 2024-2025/Twente 1-0 Almere.csv")
type_cols = [col for col in new_data.columns if '/qualifierId' in col]

# Initialize 'endX', 'endY', 'pass_angle', and 'pass_length' columns with 0.0
new_data['endX'] = 0.0
new_data['endY'] = 0.0
new_data['pass_angle'] = 0.0
new_data['pass_length'] = 0.0

# Step 3: Extract 'endX', 'endY', 'pass_angle', and 'pass_length' from qualifiers
for i in range(len(new_data)):
    df1 = new_data.iloc[i:i+1, :]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 140:  # Qualifier ID for endX
            endx = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            new_data.at[i, 'endX'] = endx
        elif col == 141:  # Qualifier ID for endY
            endy = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            new_data.at[i, 'endY'] = endy
        elif col == 213:  # Qualifier ID for pass angle
            angle = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            new_data.at[i, 'pass_angle'] = angle
        elif col == 212:  # Qualifier ID for pass length
            length = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            new_data.at[i, 'pass_length'] = length

# Step 4: Calculate recipientId based on the next action
new_data['recipientId'] = np.nan  # Initialize recipientId column

for i in range(len(new_data) - 1):  # Stop at the second-to-last row
    if new_data.loc[i, 'typeId'] == 1:  # Check if the current row is a pass
        next_row = new_data.loc[i + 1]  # Get the next row
        if next_row['contestantId'] == new_data.loc[i, 'contestantId']:  # Check if the next action is from the same team
            new_data.at[i, 'recipientId'] = next_row['playerId']  # Set the recipientId to the next playerId

# Fill missing recipientId values (e.g., if the next action is not from the same team)
new_data['recipientId'] = new_data['recipientId'].fillna(-1)  # Use -1 or another placeholder for missing values

# Step 5: Add Passer and Receiver
new_data['passer'] = new_data['playerId']
new_data['receiver'] = new_data['recipientId']

# Step 6: Add Pass Type Features
new_data['Cross'] = 0
new_data['Through-ball'] = 0
new_data['LongBall'] = 0
new_data['Switch'] = 0
new_data['Assist'] = 0
new_data['2nd Assist'] = 0

for i in range(len(new_data)):
    df1 = new_data.iloc[i:i+1, :]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 2:  # Qualifier ID for Cross
            new_data.at[i, 'Cross'] = 1
        elif col == 4:  # Qualifier ID for Through-ball
            new_data.at[i, 'Through-ball'] = 1
        elif col == 1:  # Qualifier ID for LongBall
            new_data.at[i, 'LongBall'] = 1
        elif col == 196:  # Qualifier ID for Switch
            new_data.at[i, 'Switch'] = 1
        elif col == 210:  # Qualifier ID for Assist
            new_data.at[i, 'Assist'] = 1
        elif col == 218:  # Qualifier ID for 2nd Assist
            new_data.at[i, '2nd Assist'] = 1

# Step 7: Add Pass Type as a Categorical Feature
new_data['pass_type'] = 'Other'  # Default value
new_data.loc[new_data['Cross'] == 1, 'pass_type'] = 'Cross'
new_data.loc[new_data['Through-ball'] == 1, 'pass_type'] = 'Through-ball'
new_data.loc[new_data['LongBall'] == 1, 'pass_type'] = 'LongBall'
new_data.loc[new_data['Switch'] == 1, 'pass_type'] = 'Switch'
new_data.loc[new_data['Assist'] == 1, 'pass_type'] = 'Assist'
new_data.loc[new_data['2nd Assist'] == 1, 'pass_type'] = '2nd Assist'

# Step 8: Encode Categorical Features
new_data = pd.get_dummies(new_data, columns=['pass_type', 'passer', 'receiver'], drop_first=True)

# Step 9: Clean and prepare the new data
# Convert columns to numeric and handle missing/invalid data
numeric_cols = ['x', 'y', 'endX', 'endY', 'pass_length', 'pass_angle']
for col in numeric_cols:
    new_data[col] = pd.to_numeric(new_data[col], errors='coerce')  # Convert to numeric, invalid -> NaN

# Drop rows with missing values in numeric columns
new_data = new_data.dropna(subset=numeric_cols)

# Step 10: Feature Engineering for new data
# Load xT grid
xT = pd.read_csv("xT_grid.csv", header=None)
xT = np.array(xT)
xT_rows, xT_cols = xT.shape

# Bin coordinates into xT grid zones
new_data['x1_bin'] = pd.cut(new_data['x'], bins=xT_cols, labels=False)
new_data['y1_bin'] = pd.cut(new_data['y'], bins=xT_rows, labels=False)
new_data['x2_bin'] = pd.cut(new_data['endX'], bins=xT_cols, labels=False)
new_data['y2_bin'] = pd.cut(new_data['endY'], bins=xT_rows, labels=False)

# Calculate xT and xPass
new_data['start_zone_value'] = new_data.apply(lambda row: xT[row['y1_bin'], row['x1_bin']], axis=1)
new_data['end_zone_value'] = new_data.apply(lambda row: xT[row['y2_bin'], row['x2_bin']], axis=1)
new_data['xT'] = new_data['end_zone_value'] - new_data['start_zone_value']

new_data['distance'] = np.sqrt((new_data['endX'] - new_data['x']) ** 2 + (new_data['endY'] - new_data['y']) ** 2)
new_data['angle'] = np.abs(np.arctan2(new_data['endY'] - new_data['y'], new_data['endX'] - new_data['x']))
new_data['xPass'] = 1 - (0.02 * new_data['distance'] + 0.1 * new_data['angle'])
new_data['xPass'] = new_data['xPass'].clip(0, 1)

# Step 11: Calculate Pressures and PP90
# Define event type IDs
PASS_EVENTS = {1}  # Adjust if needed
PRESSING_EVENTS = {7, 8, 44, 45, 49}

# Sort events by period, time, and sequence
new_data = new_data.sort_values(by=["periodId", "timeMin", "timeSec"])

# Shift previous event details (for contestantId and typeId)
new_data["prev_contestantId"] = new_data["contestantId"].shift(1)
new_data["prev_typeId"] = new_data["typeId"].shift(1)

# Filter pressing events where the previous event was a pass by the other team
pressing_events = new_data[
    (new_data["typeId"].isin(PRESSING_EVENTS)) & 
    (new_data["prev_typeId"].isin(PASS_EVENTS)) & 
    (new_data["contestantId"] != new_data["prev_contestantId"])  # Different team
]

# Count pressing events by player
pressing_count = pressing_events.groupby(["contestantId", "playerName"])["id"].count().reset_index()
pressing_count.rename(columns={"id": "Pressures"}, inplace=True)

# Calculate Pressures per 90 minutes (PP90)
total_minutes = new_data["timeMin"].max()  # Assuming match is complete
pressing_count["PP90"] = (pressing_count["Pressures"] / total_minutes) * 90

# Merge Pressures and PP90 back into the main DataFrame
new_data = new_data.merge(pressing_count[["playerName", "Pressures", "PP90"]], on="playerName", how="left")

# Fill missing values in Pressures and PP90 (for players with no pressing events)
new_data['Pressures'] = new_data['Pressures'].fillna(0)
new_data['PP90'] = new_data['PP90'].fillna(0)

# Step 12: Select features for prediction
features = ['xT', 'xPass', 'distance', 'angle', 'x', 'y', 'endX', 'endY', 'Pressures', 'PP90'] + \
           [col for col in new_data.columns if col.startswith('pass_type_') or col.startswith('passer_') or col.startswith('receiver_')]

X_new = new_data[features]

# Step 13: Predict disruption probabilities
new_data['disruption_probability'] = model.predict_proba(X_new)[:, 1]

# Step 14: Save the results to a new CSV file
output_file_path = "disruption 2.0.xlsx"
new_data.to_excel(output_file_path, index=False)

print(f"Predictions saved to {output_file_path}")