In [1]:
import pandas as pd
import numpy as np
import re
import os

# 📂 Folder containing Excel files
folder_path = r"/Users/user/Downloads/Women/"  # ⬅️ CHANGE THIS

# 🔄 Load all Excel files (recursively, including subfolders)
all_dfs = []
for root, _, files in os.walk(folder_path):
    for file in files:
        if file.endswith(".xlsx") or file.endswith(".xls"):
            file_path = os.path.join(root, file)
            print(f"Loading: {file_path}")
            df_temp = pd.read_excel(file_path)
            all_dfs.append(df_temp)

# 🧩 Combine all into a single DataFrame
if not all_dfs:
    raise ValueError("❌ No Excel files found in the specified folder.")
df = pd.concat(all_dfs, ignore_index=True)
print(f"✅ Loaded {len(all_dfs)} files — total rows: {len(df):,}\n")

# --------------------------------------------------------------
# 🔍 Filter & Analyze Young Attacking Players
# --------------------------------------------------------------

# Define relevant attacking positions
positions = ["LW", "RW", "AMF", "CF", "LAMF", "RAMF"]

# ✅ Match positions even if combined like "LW, CF"
pattern = "|".join([re.escape(p) for p in positions])

# Filter: Attacking players, 500+ mins, Age ≤ 21
young_attackers = df[
    df["Position"].str.contains(pattern, case=False, na=False) &
    (df["Minutes played"] >= 500) &
    (df["Age"] <= 21)
].copy()

if young_attackers.empty:
    raise ValueError("⚠️ No players match the criteria (≤21 years, 500+ mins, attacking positions).")

# --------------------------------------------------------------
# 📊 Compute Performance Metrics
# --------------------------------------------------------------

# 🧠 Per-90 metrics
young_attackers["Goals per 90"] = young_attackers["Goals"] / (young_attackers["Minutes played"] / 90)
young_attackers["xG per 90"] = young_attackers["xG"] / (young_attackers["Minutes played"] / 90)
young_attackers["Assists per 90"] = young_attackers["Assists"] / (young_attackers["Minutes played"] / 90)
young_attackers["xA per 90"] = young_attackers["xA"] / (young_attackers["Minutes played"] / 90)

# 🕒 Per-30 metrics
young_attackers["Goals per 30"] = young_attackers["Goals"] / (young_attackers["Minutes played"] / 30)
young_attackers["xG per 30"] = young_attackers["xG"] / (young_attackers["Minutes played"] / 30)
young_attackers["Assists per 30"] = young_attackers["Assists"] / (young_attackers["Minutes played"] / 30)
young_attackers["xA per 30"] = young_attackers["xA"] / (young_attackers["Minutes played"] / 30)

# ⚡ Overperformance metrics (only positive differences count)
young_attackers["Overperformance Goals"] = (young_attackers["Goals per 30"] - young_attackers["xG per 30"]).clip(lower=0)
young_attackers["Overperformance Assists"] = (young_attackers["Assists per 30"] - young_attackers["xA per 30"]).clip(lower=0)

# 🧩 Total performance score (weighted sum emphasizing goals)
young_attackers["Overperformance Score"] = (
    young_attackers["Overperformance Goals"] * 0.7 +
    young_attackers["Overperformance Assists"] * 0.3
)

# --------------------------------------------------------------
# 🎨 Style and Display
# --------------------------------------------------------------

cols = [
    "Player", "Team within selected timeframe", "Position", "Age", "Minutes played",
    "Goals", "Assists", "xG", "xA",
    "Goals per 30", "xG per 30", "Assists per 30", "xA per 30",
    "Goals per 90", "xG per 90", "Assists per 90", "xA per 90",
    "Overperformance Goals", "Overperformance Assists", "Overperformance Score"
]

styled = (
    young_attackers[cols]
    .sort_values("Overperformance Score", ascending=False)
    .style
    .background_gradient(
        subset=["Overperformance Goals", "Overperformance Assists", "Overperformance Score"],
        cmap="RdYlGn"
    )
    .format({
        "Goals per 30": "{:.2f}",
        "xG per 30": "{:.2f}",
        "Assists per 30": "{:.2f}",
        "xA per 30": "{:.2f}",
        "Goals per 90": "{:.2f}",
        "xG per 90": "{:.2f}",
        "Assists per 90": "{:.2f}",
        "xA per 90": "{:.2f}",
        "Overperformance Goals": "{:+.2f}",
        "Overperformance Assists": "{:+.2f}",
        "Overperformance Score": "{:+.2f}",
    })
)

styled



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 701, in start
    self.io_loop.start()
  File "/opt/anaconda3/lib/python3.12/site-

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.




A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 701, in start
    self.io_loop.start()
  File "/opt/anaconda3/lib/python3.12/site-

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.




A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 701, in start
    self.io_loop.start()
  File "/opt/anaconda3/lib/python3.12/site-

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 701, in start
    self.io_loop.start()
  File "/opt/anaconda3/lib/python3.12/site-

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.



Loading: /Users/user/Downloads/Women/Sweden.xlsx
Loading: /Users/user/Downloads/Women/USA.xlsx
Loading: /Users/user/Downloads/Women/France.xlsx
Loading: /Users/user/Downloads/Women/Russia.xlsx
Loading: /Users/user/Downloads/Women/Belgium.xlsx
Loading: /Users/user/Downloads/Women/England 2.xlsx
Loading: /Users/user/Downloads/Women/Germany.xlsx
Loading: /Users/user/Downloads/Women/Canada.xlsx
Loading: /Users/user/Downloads/Women/Spain.xlsx
Loading: /Users/user/Downloads/Women/Denmark.xlsx
Loading: /Users/user/Downloads/Women/Hong Kong.xlsx
Loading: /Users/user/Downloads/Women/New Zealand.xlsx
Loading: /Users/user/Downloads/Women/Ukraine.xlsx
Loading: /Users/user/Downloads/Women/Ireland.xlsx
Loading: /Users/user/Downloads/Women/Latvia.xlsx
Loading: /Users/user/Downloads/Women/Portugal.xlsx
Loading: /Users/user/Downloads/Women/Sweden 2.xlsx
Loading: /Users/user/Downloads/Women/China.xlsx
Loading: /Users/user/Downloads/Women/Australia.xlsx
Loading: /Users/user/Downloads/Women/England U21.xl

Unnamed: 0,Player,Team within selected timeframe,Position,Age,Minutes played,Goals,Assists,xG,xA,Goals per 30,xG per 30,Assists per 30,xA per 30,Goals per 90,xG per 90,Assists per 90,xA per 90,Overperformance Goals,Overperformance Assists,Overperformance Score
3193,Y. Molodiuk,Metalist 1925 Kharkiv,CF,21.0,658,10,3,4.0,1.51,0.46,0.18,0.14,0.07,1.37,0.55,0.41,0.21,0.27,0.07,0.21
4933,E. Hendle,Chelsea Women U21,"LW, LB",17.0,608,9,5,4.34,2.38,0.44,0.21,0.25,0.12,1.33,0.64,0.74,0.35,0.23,0.13,0.2
7262,K. Weir,Glentoran BU,CF,19.0,685,12,2,6.01,1.97,0.53,0.26,0.09,0.09,1.58,0.79,0.26,0.26,0.26,0.0,0.18
4908,C. Giddings,Bristol City U21,"LW, RWF, RW",16.0,680,9,1,4.15,0.76,0.4,0.18,0.04,0.03,1.19,0.55,0.13,0.1,0.21,0.01,0.15
4958,P. Fergus,Brighton & Hove Albion U21,CF,16.0,572,9,0,4.95,0.09,0.47,0.26,0.0,0.0,1.42,0.78,0.0,0.01,0.21,0.0,0.15
7235,E. Reilly,Linfield,CF,21.0,904,16,2,10.11,1.35,0.53,0.34,0.07,0.04,1.59,1.01,0.2,0.13,0.2,0.02,0.14
1142,N. Matejić,Zenit,CF,20.0,851,11,5,6.87,1.49,0.39,0.24,0.18,0.05,1.16,0.73,0.53,0.16,0.15,0.12,0.14
4972,S. Davis,Chelsea Women U21,"LCMF, RCMF, AMF",17.0,550,4,0,0.72,0.39,0.22,0.04,0.0,0.02,0.65,0.12,0.0,0.06,0.18,0.0,0.13
8445,E. McLean,St. Johnstone,"CF, RDMF, RCMF",21.0,502,3,2,0.53,0.81,0.18,0.03,0.12,0.05,0.54,0.1,0.36,0.15,0.15,0.07,0.12
3690,A. Vainere,Metta,"RCMF, AMF",21.0,1197,9,1,2.25,2.07,0.23,0.06,0.03,0.05,0.68,0.17,0.08,0.16,0.17,0.0,0.12


In [2]:
import pandas as pd
import numpy as np
import re
import os

# 📄 Load ONE Excel file
file_path = r"/Users/user/Downloads/2025-2026 DB/Iceland.xlsx"   # ⬅️ CHANGE THIS

if not os.path.isfile(file_path):
    raise FileNotFoundError(f"❌ File not found: {file_path}")

print(f"Loading: {file_path}")
df = pd.read_excel(file_path)

print(f"✅ Loaded file — total rows: {len(df):,}\n")

# --------------------------------------------------------------
# 🔍 Filter & Analyze Young Attacking Players
# --------------------------------------------------------------

# Define relevant attacking positions
positions = ["LW", "RW", "AMF", "CF", "LAMF", "RAMF"]

# Match positions even if combined like "LW, CF"
pattern = "|".join([re.escape(p) for p in positions])

# Filter: Attacking players, 500+ mins, Age ≤ 21
young_attackers = df[
    df["Position"].str.contains(pattern, case=False, na=False) &
    (df["Minutes played"] >= 500) &
    (df["Age"] <= 25)
].copy()

if young_attackers.empty:
    raise ValueError("⚠️ No players match the criteria (≤21 years, 500+ mins, attacking positions).")

# --------------------------------------------------------------
# 📊 Compute Performance Metrics
# --------------------------------------------------------------

# Per-90 metrics
young_attackers["Goals per 90"] = young_attackers["Goals"] / (young_attackers["Minutes played"] / 90)
young_attackers["xG per 90"] = young_attackers["xG"] / (young_attackers["Minutes played"] / 90)
young_attackers["Assists per 90"] = young_attackers["Assists"] / (young_attackers["Minutes played"] / 90)
young_attackers["xA per 90"] = young_attackers["xA"] / (young_attackers["Minutes played"] / 90)

# Per-30 metrics
young_attackers["Goals per 30"] = young_attackers["Goals"] / (young_attackers["Minutes played"] / 30)
young_attackers["xG per 30"] = young_attackers["xG"] / (young_attackers["Minutes played"] / 30)
young_attackers["Assists per 30"] = young_attackers["Assists"] / (young_attackers["Minutes played"] / 30)
young_attackers["xA per 30"] = young_attackers["xA"] / (young_attackers["Minutes played"] / 30)

# Overperformance metrics (positive differences only)
young_attackers["Overperformance Goals"] = (
    young_attackers["Goals per 30"] - young_attackers["xG per 30"]
).clip(lower=0)

young_attackers["Overperformance Assists"] = (
    young_attackers["Assists per 30"] - young_attackers["xA per 30"]
).clip(lower=0)

# Weighted total score
young_attackers["Overperformance Score"] = (
    young_attackers["Overperformance Goals"] * 0.7 +
    young_attackers["Overperformance Assists"] * 0.3
)

# --------------------------------------------------------------
# 🎨 Style and Display
# --------------------------------------------------------------

cols = [
    "Player", "Team within selected timeframe", "Position", "Age", "Minutes played",
    "Goals", "Assists", "xG", "xA",
    "Goals per 30", "xG per 30", "Assists per 30", "xA per 30",
    "Goals per 90", "xG per 90", "Assists per 90", "xA per 90",
    "Overperformance Goals", "Overperformance Assists", "Overperformance Score"
]

styled = (
    young_attackers[cols]
    .sort_values("Overperformance Score", ascending=False)
    .style
    .background_gradient(
        subset=["Overperformance Goals", "Overperformance Assists", "Overperformance Score"],
        cmap="RdYlGn"
    )
    .format({
        "Goals per 30": "{:.2f}",
        "xG per 30": "{:.2f}",
        "Assists per 30": "{:.2f}",
        "xA per 30": "{:.2f}",
        "Goals per 90": "{:.2f}",
        "xG per 90": "{:.2f}",
        "Assists per 90": "{:.2f}",
        "xA per 90": "{:.2f}",
        "Overperformance Goals": "{:+.2f}",
        "Overperformance Assists": "{:+.2f}",
        "Overperformance Score": "{:+.2f}",
    })
)

styled


Loading: /Users/user/Downloads/2025-2026 DB/Iceland.xlsx


✅ Loaded file — total rows: 314



Unnamed: 0,Player,Team within selected timeframe,Position,Age,Minutes played,Goals,Assists,xG,xA,Goals per 30,xG per 30,Assists per 30,xA per 30,Goals per 90,xG per 90,Assists per 90,xA per 90,Overperformance Goals,Overperformance Assists,Overperformance Score
129,V. Dimitrijevic,Fram,"CF, LW",24.0,1385,9,1,5.26,1.47,0.19,0.11,0.02,0.03,0.58,0.34,0.06,0.1,0.08,0.0,0.06
104,L Heimisson,Valur,"LCMF, CF, LAMF",22.0,1625,6,4,3.1,1.09,0.11,0.06,0.07,0.02,0.33,0.17,0.22,0.06,0.05,0.05,0.05
161,D. Berg Jónsson,Vestri,"RW, CF",19.0,1000,5,2,3.32,0.59,0.15,0.1,0.06,0.02,0.45,0.3,0.18,0.05,0.05,0.04,0.05
59,G. Nokkvason,Stjarnan,"LCMF, RCMF, CF",21.0,2053,7,6,5.05,3.24,0.1,0.07,0.09,0.05,0.31,0.22,0.26,0.14,0.03,0.04,0.03
132,I. Stole,KA,"RAMF, RW, RB",21.0,1346,3,5,1.91,2.83,0.07,0.04,0.11,0.06,0.2,0.13,0.33,0.19,0.02,0.05,0.03
182,T. Hafthorsson,Afturelding,"RAMF, RW",24.0,805,2,1,0.82,1.4,0.07,0.03,0.04,0.05,0.22,0.09,0.11,0.16,0.04,0.0,0.03
179,L. Rae,KR,"RW, RAMF",24.0,859,4,4,2.83,3.81,0.14,0.1,0.14,0.13,0.42,0.3,0.42,0.4,0.04,0.01,0.03
116,S. Hjaltested,ÍBV,CF,25.0,1484,6,1,4.0,0.64,0.12,0.08,0.02,0.01,0.36,0.24,0.06,0.04,0.04,0.01,0.03
160,J. Byström,Fram,"CF, RW",20.0,1012,7,0,5.65,0.41,0.21,0.17,0.0,0.01,0.62,0.5,0.0,0.04,0.04,0.0,0.03
111,G. Unnarsson,ÍA,"RW, RAMF, CF",24.0,1530,6,0,4.17,1.48,0.12,0.08,0.0,0.03,0.35,0.25,0.0,0.09,0.04,0.0,0.03
