<a href="https://colab.research.google.com/github/marclamberts/football-analysis/blob/main/IQR_%2B_Entropy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
# Step 2: Import necessary libraries
import pandas as pd
import numpy as np
from scipy.stats import iqr, entropy
import os


In [3]:
# Step 3: Load Excel file from Google Drive
# Replace with the path to your file
file_path = '/content/drive/MyDrive/Database March 2025/MLS 2025.xlsx'
df = pd.read_excel(file_path)


In [8]:
goals_per_90_col = [col for col in df.columns if "Goals per 90" in col][0]

# Step 5: Filter for players with 'CF' in Position and >= 200 Minutes played
filtered_df = df[
    df['Position'].astype(str).str.contains("CF", case=False, na=False) &
    (pd.to_numeric(df['Minutes played'], errors='coerce') >= 200)
]

# Step 6: Clean and extract Goals per 90 values
goals_per_90_filtered = pd.to_numeric(filtered_df[goals_per_90_col], errors='coerce').dropna()

# Step 7: Calculate IQR and Entropy
filtered_iqr = iqr(goals_per_90_filtered)
hist_filtered, _ = np.histogram(goals_per_90_filtered, bins='auto', density=True)
filtered_entropy = entropy(hist_filtered)

# Step 8: Output results
print("Filtered players:", filtered_df.shape[0])
print("IQR (Goals per 90):", filtered_iqr)
print("Entropy (Goals per 90):", filtered_entropy)
# Step 6: (Optional) Add stats to a new DataFrame
summary_df = pd.DataFrame({
    "Stat": ["Goals per 90 IQR", "Goals per 90 Entropy"],
    "Value": [goals_iqr, goal_entropy]
})
# Step 7: Save to Excel in Google Drive
output_path = '/content/drive/MyDrive/xDuel/MLS_2025_Processed.xlsx'

with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
    df.to_excel(writer, sheet_name='Original Data', index=False)
    summary_df.to_excel(writer, sheet_name='Analysis Summary', index=False)

print("File saved to Google Drive!")


Filtered players: 52
IQR (Goals per 90): 0.29500000000000004
Entropy (Goals per 90): 1.692796532714847
File saved to Google Drive!


In [5]:
pip install xlsxwriter

Collecting xlsxwriter
  Downloading XlsxWriter-3.2.3-py3-none-any.whl.metadata (2.7 kB)
Downloading XlsxWriter-3.2.3-py3-none-any.whl (169 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/169.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/169.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.4/169.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.3
