In [1]:
pip install pandas openpyxl


Note: you may need to restart the kernel to use updated packages.


In [21]:
from pathlib import Path
from datetime import datetime, timezone
import re
import unicodedata
import hashlib
import warnings
import pandas as pd
import numpy as np

# ============ CONFIG ============
INPUT_DIR = Path(r"C:\Users\ronan.gaborit\OLYMPIQUE DE MARSEILLE\Centre de Formation - Sportif\DEPOT FICHIERS GPS CDF\U19")
EXCEL_PATH = INPUT_DIR / "databaseU19.xlsx"
SHEET_DATA = "data"
SHEET_LOG  = "_import_log"

DECIMAL_COMMA  = True           # privilégie la virgule décimale pour les exports EU (;)
SKIP_UNCHANGED = True

# --- Interrupteurs debug ---
BYPASS_FILTERS = False          # ne filtre pas Full Training/Game si True
BYPASS_DEDUP   = False
SHOW_DIAG      = True

# --- Forcer la réimportation de certains fichiers ---
FORCE_REIMPORT_NAMES: set[str] = {"U17_2025-09-29_COMP_06.csv", "U17_2025-10-03_GROUP_55.csv"}

# ---------- Colonnes "métier" à exposer ----------
KEEP_COLS = [
    "Session Date", "Session Title",
    "Drill Title", "Session Type",
    "Player Display Name", "Player Name",
    "Total Distance", "Distance Per Min",
    "Sprint Distance",
    "Entries Zone 5 (Absolute)", "Entries Zone 6 (Absolute)",
    "High Speed Running (Absolute)", "High Speed Running (Relative)",
    "HSR Per Minute (Absolute)", "HSR Per Minute (Relative)",
    "% Vmax", "Max Speed",
    "Accelerations (Absolute)", "Accelerations (Relative)",
    "Accelerations Per Min (Absolute)", "Accelerations Per Min (Relative)",
    "Decelerations (Absolute)", "Decelerations (Relative)",
    "Decels Per Min (Absolute)", "Decelerations Per Min (Relative)",
    "Accelerations Zone 3 (Absolute)", "Accelerations Zone 3 (Relative)",
    "Accelerations Zone 4 (Absolute)", "Accelerations Zone 4 (Relative)",

    # --- Temps bruts (on créera aussi la version (min))
    "Accelerations Total Time Zone 3 (Absolute)", "Accelerations Total Time Zone 3 (Relative)",
    "Accelerations Total Time Zone 4 (Absolute)", "Accelerations Total Time Zone 4 (Relative)",
    "Decelerations Total Time Zone 3 (Absolute)", "Decelerations Total Time Zone 3 (Relative)",
    "Decelerations Total Time Zone 4 (Absolute)", "Decelerations Total Time Zone 4 (Relative)",
    "Metabolic Time Zone 3 (Absolute)", "Metabolic Time Zone 3 (Relative)",
    "Total Time",

    # Distances par zone
    "Accelerations Total Distance Zone 3 (Absolute)", "Accelerations Total Distance Zone 3 (Relative)",
    "Accelerations Total Distance Zone 4 (Absolute)", "Accelerations Total Distance Zone 4 (Relative)",
    "Decelerations Total Distance Zone 3 (Absolute)", "Decelerations Total Distance Zone 3 (Relative)",
    "Decelerations Total Distance Zone 4 (Absolute)", "Decelerations Total Distance Zone 4 (Relative)",

    "HML Distance", "HML Efforts", "HML Efforts Total Distance",
    "HML Time", "HMLD Per Minute",
    "HML Efforts Maximum Speed", "Average Time Since Last HML Effort", "Step Balance",
]

# Ajouts fixes + dérivées normalisées
KEEP_COLS += [
    "Distance Zone 4 (Absolute)", "Distance Zone 5 (Absolute)", "Distance Zone 6 (Absolute)",
    "Total Time (min)",  # dérivée
]

# Tags utilisateur
PLAYER_NAME_COLS = ["Player Display Name", "Player Name", "Player First Name", "Player Last Name"]
USER_TAG_COLS = ["Semaine", "Indicateur séance"]
KEEP_COLS = KEEP_COLS + USER_TAG_COLS

# ======== SCHEMA POWER BI (verrouillé) ========
REQUIRED_PBI_COLS = list(dict.fromkeys(KEEP_COLS + [
    "Decelerations Zone 3 (Absolute)", "Decelerations Zone 3 (Relative)",
    "Decelerations Zone 4 (Absolute)", "Decelerations Zone 4 (Relative)",
    "Accelerations Zone 3 (Absolute)", "Accelerations Zone 3 (Relative)",
    "Accelerations Zone 4 (Absolute)", "Accelerations Zone 4 (Relative)",
    "Accelerations Total Time Zone 3 (Absolute) (min)",
    "Accelerations Total Time Zone 3 (Relative) (min)",
    "Accelerations Total Time Zone 4 (Absolute) (min)",
    "Accelerations Total Time Zone 4 (Relative) (min)",
    "Decelerations Total Time Zone 3 (Absolute) (min)",
    "Decelerations Total Time Zone 3 (Relative) (min)",
    "Decelerations Total Time Zone 4 (Absolute) (min)",
    "Decelerations Total Time Zone 4 (Relative) (min)",
    "Metabolic Time Zone 3 (Absolute) (min)", "Metabolic Time Zone 3 (Relative) (min)",
]))

# Colonnes quantitatives à forcer en numérique et remplir à 0
NUMERIC_EXPECTED = [
    "Total Distance", "Distance Per Min", "Sprint Distance",
    "Entries Zone 5 (Absolute)", "Entries Zone 6 (Absolute)",
    "High Speed Running (Absolute)", "High Speed Running (Relative)",
    "HSR Per Minute (Absolute)", "HSR Per Minute (Relative)",
    "% Vmax", "Max Speed",
    "Accelerations (Absolute)", "Accelerations (Relative)",
    "Accelerations Per Min (Absolute)", "Accelerations Per Min (Relative)",
    "Decelerations (Absolute)", "Decelerations (Relative)",
    "Decels Per Min (Absolute)", "Decelerations Per Min (Relative)",
    "Accelerations Zone 3 (Absolute)", "Accelerations Zone 3 (Relative)",
    "Accelerations Zone 4 (Absolute)", "Accelerations Zone 4 (Relative)",
    "Decelerations Zone 3 (Absolute)", "Decelerations Zone 3 (Relative)",
    "Decelerations Zone 4 (Absolute)", "Decelerations Zone 4 (Relative)",
    "Accelerations Total Distance Zone 3 (Absolute)", "Accelerations Total Distance Zone 3 (Relative)",
    "Accelerations Total Distance Zone 4 (Absolute)", "Accelerations Total Distance Zone 4 (Relative)",
    "Decelerations Total Distance Zone 3 (Absolute)", "Decelerations Total Distance Zone 3 (Relative)",
    "Decelerations Total Distance Zone 4 (Absolute)", "Decelerations Total Distance Zone 4 (Relative)",
    "Distance Zone 4 (Absolute)", "Distance Zone 5 (Absolute)", "Distance Zone 6 (Absolute)",
    "Total Time (min)",
    "Accelerations Total Time Zone 3 (Absolute) (min)", "Accelerations Total Time Zone 3 (Relative) (min)",
    "Accelerations Total Time Zone 4 (Absolute) (min)", "Accelerations Total Time Zone 4 (Relative) (min)",
    "Decelerations Total Time Zone 3 (Absolute) (min)", "Decelerations Total Time Zone 3 (Relative) (min)",
    "Decelerations Total Time Zone 4 (Absolute) (min)", "Decelerations Total Time Zone 4 (Relative) (min)",
    "Metabolic Time Zone 3 (Absolute) (min)", "Metabolic Time Zone 3 (Relative) (min)",
]

warnings.filterwarnings("ignore", message="Parsing dates in %d/%m/%Y format")

# ---------- Helpers lecture ----------
def smart_read_csv(p: Path) -> pd.DataFrame:
    """Parsing robuste (EU/US) + fallback si une seule colonne est lue."""
    # lecture brute de l’en-tête
    try:
        head = p.open("rb").read(4096)
    except Exception:
        return pd.read_csv(p, sep=",", encoding="utf-8-sig", engine="python", on_bad_lines="skip")

    enc = "utf-8-sig" if head.startswith(b"\xef\xbb\xbf") else "cp1252"
    try:
        txt = head.decode(enc, errors="ignore")
    except Exception:
        enc = "cp1252"
        txt = head.decode(enc, errors="ignore")

    # RÈGLE : s'il y a au moins un ';' → format EU
    if ";" in txt:
        sep, dec, enc_eff = ";", ("," if DECIMAL_COMMA else "."), "cp1252"
    else:
        sep, dec, enc_eff = ",", ".", "utf-8-sig"

    df = pd.read_csv(p, sep=sep, encoding=enc_eff, decimal=dec, engine="python", on_bad_lines="skip")
    if df.shape[1] == 1:
        for sep2, enc2, dec2 in [(";", "cp1252", ","), (",", "utf-8-sig", "."), (";", "utf-8-sig", ","), (",", "cp1252", ".")]:
            try:
                df2 = pd.read_csv(p, sep=sep2, encoding=enc2, decimal=dec2, engine="python", on_bad_lines="skip")
                if df2.shape[1] > 1:
                    df = df2
                    break
            except Exception:
                pass

    df.columns = [str(c).strip() for c in df.columns]
    df = df.loc[:, ~df.columns.str.match(r"Unnamed:\s*\d+", case=False, na=False)]
    return df

def read_excel_sheet(path: Path, sheet: str) -> pd.DataFrame:
    if not path.exists():
        return pd.DataFrame()
    try:
        return pd.read_excel(path, sheet_name=sheet, engine="openpyxl")
    except Exception:
        return pd.DataFrame()

def hash_head(path: Path, n=1_000_000) -> str:
    h = hashlib.sha1()
    with open(path, "rb") as f:
        h.update(f.read(n))
    return h.hexdigest()[:32]

def _ledger_has_cols(ledger: pd.DataFrame) -> bool:
    need = {"filepath", "size", "mtime_ns", "sha1_32"}
    return not ledger.empty and need.issubset({str(c) for c in ledger.columns})

def should_skip(p: Path, ledger: pd.DataFrame) -> bool:
    if not SKIP_UNCHANGED or not _ledger_has_cols(ledger):
        return False
    try:
        st = p.stat()
        sig = (str(p.resolve()), st.st_size, st.st_mtime_ns, hash_head(p))
        mask = (
            (ledger["filepath"] == sig[0]) &
            (ledger["size"]     == sig[1]) &
            (ledger["mtime_ns"] == sig[2]) &
            (ledger["sha1_32"]  == sig[3])
        )
        return bool(mask.any())
    except Exception:
        return False

# ---------- Normalisation & clés ----------
def _norm_txt(x: str) -> str:
    if pd.isna(x):
        return ""
    x = str(x).strip()
    x = "".join(ch for ch in unicodedata.normalize("NFKD", x) if not unicodedata.combining(ch))
    x = re.sub(r"\s+", " ", x).strip().lower()
    return x

def _norm_date(x) -> str:
    try:
        dt = pd.to_datetime(x, errors="coerce", dayfirst=True, utc=False)
        if pd.isna(dt):
            dt = pd.to_datetime(x, errors="coerce", dayfirst=False, utc=False)
        if pd.isna(dt):
            return ""
        if isinstance(dt, pd.Series):
            dt = dt.iloc[0]
        return dt.normalize().strftime("%Y-%m-%d")
    except Exception:
        return ""

_fname_re = re.compile(
    r"""(?P<team>[A-Za-z0-9]+)?_?(?P<date>\d{4}[-_]?\d{2}[-_]?\d{2})[_-]?(?P<title>[^._]+?)(?:[_-]?\d+)?\.csv$""",
    re.IGNORECASE | re.VERBOSE
)

def _parse_session_from_filename(fname: str) -> tuple[str, str]:
    m = _fname_re.search(fname)
    if not m:
        return ("", "")
    raw_date = m.group("date").replace("_", "-")
    if len(raw_date) == 8:
        raw_date = f"{raw_date[0:4]}-{raw_date[4:6]}-{raw_date[6:8]}"
    title = m.group("title")
    return (raw_date, title)

def _make_session_key(row: pd.Series) -> str:
    date_norm  = _norm_date(row.get("Session Date", ""))
    title_norm = _norm_txt(row.get("Session Title", ""))
    if date_norm or title_norm:
        return f"{date_norm}|{title_norm}"
    fdate, ftitle = _parse_session_from_filename(str(row.get("_source_file", "")))
    return f"{_norm_date(fdate)}|{_norm_txt(ftitle)}"

def _make_player_key(row: pd.Series, player_cols: list[str]) -> str:
    for c in player_cols:
        if c in row and pd.notna(row[c]) and str(row[c]).strip():
            return _norm_txt(row[c])
    return ""

def _is_game_row(row: pd.Series) -> bool:
    val = _norm_txt(row.get("Session Type", ""))
    return val in {"game", "match"}

def _is_warmup_row(row: pd.Series) -> bool:
    val = _norm_txt(row.get("Drill Title", ""))
    return any(k in val for k in ("warm", "echauf", "echauff", "activation"))

# ---------- Durées -> minutes ----------
_DUR_RE = re.compile(r"""
    ^\s*
    (?:
        (?:(?P<h>\d{1,3}))\s*(?:h|hr|hrs|heure|heures)\s*
        (?:(?P<m>\d{1,2}))?\s*(?:m|min|mins|minute|minutes)?\s*
        (?:(?P<s>\d{1,2}))?\s*(?:s|sec|secs|seconde|secondes)?
      |
        (?:(?P<hms_h>\d{1,3})):(?P<hms_m>\d{1,2})(?::(?P<hms_s>\d{1,2}))?
      |
        (?:(?P<mm>\d{1,4}))\s*(?:m|min|mins)\b
      |
        (?:(?P<ss>\d{1,6}))\s*(?:s|sec|secs)\b
    )
    \s*$
""", re.IGNORECASE | re.VERBOSE)

def _to_minutes_from_any_duration(x) -> float:
    if pd.isna(x):
        return float("nan")
    if isinstance(x, (int, float)):
        return float(x)  # déjà minutes
    s = str(x).strip()
    if not s:
        return float("nan")
    s2 = s.replace(",", ".")
    try:
        return float(s2)
    except Exception:
        pass
    m = _DUR_RE.match(s2)
    if m:
        if m.group("h") or m.group("m") or m.group("s"):
            h = float(m.group("h") or 0); mi = float(m.group("m") or 0); se = float(m.group("s") or 0)
        elif m.group("hms_h"):
            h = float(m.group("hms_h") or 0); mi = float(m.group("hms_m") or 0); se = float(m.group("hms_s") or 0)
        elif m.group("mm"):
            h = 0.0; mi = float(m.group("mm") or 0); se = 0.0
        elif m.group("ss"):
            h = 0.0; mi = 0.0; se = float(m.group("ss") or 0)
        else:
            return float("nan")
        return h * 60.0 + mi + se / 60.0
    if ":" in s2:
        parts = [p for p in s2.split(":") if p != ""]
        try:
            parts = [float(p) for p in parts]
            if len(parts) == 3:
                h, mi, se = parts; return h * 60.0 + mi + se / 60.0
            if len(parts) == 2:
                mi, se = parts;  return mi + se / 60.0
        except Exception:
            pass
    return float("nan")

def _is_time_like_col(name: str) -> bool:
    low = name.lower()
    if "time" not in low:
        return False
    if "since" in low:
        return False
    if "total time" in low or " time zone " in low or low.endswith("total time") or "metabolic time" in low:
        return True
    return False

def _add_minutes_derivatives(df: pd.DataFrame) -> pd.DataFrame:
    time_cols = [c for c in df.columns if _is_time_like_col(c)]
    for c in time_cols:
        newc = f"{c} (min)"
        if newc not in df.columns:
            df[newc] = df[c].map(_to_minutes_from_any_duration)
    if "Total Time" in df.columns and "Total Time (min)" not in df.columns:
        df["Total Time (min)"] = df["Total Time"].map(_to_minutes_from_any_duration)
    return df

# ---------- Cleaners / alias ----------
def strip_accents(text: str):
    if pd.isna(text):
        return text
    return "".join(ch for ch in unicodedata.normalize("NFKD", str(text)) if not unicodedata.combining(ch))

def clean_player_names(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
    for c in cols:
        if c in df.columns:
            df[c] = df[c].astype("string").map(strip_accents).str.replace(r"\s+", " ", regex=True).str.strip()
    return df

# Alias usuels
CANON = {
    "Decels Zone 3 (Absolute)": "Decelerations Zone 3 (Absolute)",
    "Decels Zone 3 (Relative)": "Decelerations Zone 3 (Relative)",
    "Deceleration Zone 3 (Absolute)": "Decelerations Zone 3 (Absolute)",
    "Deceleration Zone 3 (Relative)": "Decelerations Zone 3 (Relative)",
}

def apply_aliases(df: pd.DataFrame) -> pd.DataFrame:
    found = {k: v for k, v in CANON.items() if k in df.columns and v not in df.columns}
    if found:
        df = df.rename(columns=found)
    return df

# ---------- Label normalizer pour capter "Full training – Entire" ----------
def _norm_label(s: str) -> str:
    s = (str(s).casefold()
                 .replace("–", "-")
                 .replace("—", "-"))
    s = re.sub(r"[^a-z0-9\s-]+", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

# ---------- Sélection qui PRÉSERVE le schéma ----------
def select_keep_cols(df: pd.DataFrame, keep: list[str]) -> pd.DataFrame:
    """Garantit que toutes les colonnes 'keep' existent (créées si absentes) et renvoie dans le même ordre."""
    if not keep:
        return df
    df = df.copy()
    for c in keep:
        if c not in df.columns:
            df[c] = pd.NA
    return df[keep]

# ============== MAIN ==============
def run():
    INPUT_DIR.mkdir(parents=True, exist_ok=True)

    ledger   = read_excel_sheet(EXCEL_PATH, SHEET_LOG)
    existing = read_excel_sheet(EXCEL_PATH, SHEET_DATA)

    for col in USER_TAG_COLS:
        if col not in existing.columns:
            existing[col] = pd.Series(dtype="object")

    csvs = sorted([p for p in INPUT_DIR.rglob("*") if p.suffix.lower() == ".csv" and p.is_file()])
    if not csvs:
        print(f"Aucun CSV trouvé. Déposez vos fichiers dans : {INPUT_DIR}")
        _write_empty_excel()
        return

    imported, new_log = [], []
    for p in csvs:
        if p.name not in FORCE_REIMPORT_NAMES and should_skip(p, ledger):
            print(f"SKIP   {p.name} (unchanged)")
            continue

        print(f"IMPORT {p.name}")
        df = smart_read_csv(p)
        df = apply_aliases(df)

        # Compléter "Session Date" depuis le nom de fichier si vide
        if "Session Date" in df.columns:
            fdate, _ = _parse_session_from_filename(p.name)
            is_empty = df["Session Date"].isna() | (df["Session Date"].astype(str).str.strip() == "")
            if fdate:
                df.loc[is_empty, "Session Date"] = fdate
            df["Session Date"] = df["Session Date"].ffill().bfill()

        # Détection game vs entrainement (safe si colonnes absentes)
        df["__is_game"] = False
        if "Session Type" in df.columns:
            df["__is_game"] = df.apply(_is_game_row, axis=1)

        # Filtres robustes
        if not BYPASS_FILTERS:
            # Entraînements
            train_df = df[~df["__is_game"]].copy()
            if not train_df.empty and "Drill Title" in train_df.columns:
                norm = train_df["Drill Title"].map(_norm_label)
                mask = (
                    (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
                    (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
                )
                train_df = train_df[mask].copy()
                if not train_df.empty:
                    train_df["Drill Title"] = "Full Training"

            # Matchs (sans échauffements)
            game_df = df[df["__is_game"]].copy()
            if not game_df.empty and "Drill Title" in game_df.columns:
                game_df = game_df[~game_df.apply(_is_warmup_row, axis=1)]
                if not game_df.empty:
                    game_df["Drill Title"] = "Game"

            # Si l’un des deux a du contenu, on recombine ; sinon on garde df tel quel
            if (not train_df.empty) or (not game_df.empty):
                df = pd.concat([train_df, game_df], ignore_index=True)

        if df.empty:
            continue

        # Colonnes dérivées (minutes)
        df = _add_minutes_derivatives(df)

        # provenance
        now_utc = datetime.now(timezone.utc)
        df["_source_file"] = p.name
        df["_source_path"] = str(p.resolve())
        df["_imported_at"] = now_utc.strftime("%Y-%m-%d %H:%M:%S%z")

        # Clés & réduction : 1 ligne par joueur & séance/game
        df["_session_key"] = df.apply(_make_session_key, axis=1)
        df["_player_key"]  = df.apply(lambda r: _make_player_key(r, PLAYER_NAME_COLS), axis=1)
        df = df.drop_duplicates(subset=["_session_key", "_player_key"], keep="first", ignore_index=True)

        for col in USER_TAG_COLS:
            if col not in df.columns:
                df[col] = pd.NA

        imported.append(df)

        st = p.stat()
        new_log.append({
            "filepath": str(p.resolve()),
            "size": st.st_size,
            "mtime_ns": st.st_mtime_ns,
            "sha1_32": hash_head(p),
            "rows": len(df),
            "imported_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S%z"),
        })

    if not imported:
        print("Aucun nouveau contenu à importer après filtrage.")
        _write_empty_excel()
        return

    new_block = pd.concat(imported, ignore_index=True, sort=True)
    combined = pd.concat([existing, new_block], ignore_index=True, sort=True) if not existing.empty else new_block

    # Dédoublonnage global par (session, joueur)
    if {"_session_key", "_player_key"}.issubset(set(combined.columns)) and not BYPASS_DEDUP:
        combined = combined.drop_duplicates(subset=["_session_key", "_player_key"], keep="last", ignore_index=True)

    # Dédoublonnage “métier”
    if not BYPASS_DEDUP:
        data_cols = [c for c in combined.columns if not str(c).startswith("_") and c not in USER_TAG_COLS]
        if data_cols:
            combined = combined.drop_duplicates(subset=data_cols, keep="last", ignore_index=True)

    # Noms joueurs propres
    combined = clean_player_names(combined, PLAYER_NAME_COLS)

    # === PRÉSERVATION DU SCHÉMA POUR POWER BI ===
    meta_cols = ["_source_file", "_source_path", "_imported_at", "_session_key", "_player_key"]
    combined = select_keep_cols(combined, REQUIRED_PBI_COLS + meta_cols)

    # valeurs numériques -> float + 0 si manquantes
    for c in NUMERIC_EXPECTED:
        if c in combined.columns:
            combined[c] = pd.to_numeric(combined[c], errors="coerce").fillna(0)

    # Dates en texte JJ/MM/AAAA
    if "Session Date" in combined.columns:
        combined["Session Date"] = pd.to_datetime(combined["Session Date"], errors="coerce", dayfirst=True)
        combined["Session Date"] = combined["Session Date"].dt.strftime("%d/%m/%Y").fillna("")

    # DIAG
    if SHOW_DIAG:
        diag_cols = [c for c in [
            "Player Display Name","Session Date","Drill Title",
            "Entries Zone 5 (Absolute)", "Entries Zone 6 (Absolute)",
            "Total Time", "Total Time (min)",
            "Accelerations Total Time Zone 3 (Absolute) (min)",
            "Decelerations Zone 3 (Absolute)"
        ] if c in combined.columns]
        if diag_cols:
            print("\nAperçu :")
            print(combined[diag_cols].head(10).to_string(index=False))

    ledger2 = pd.concat([ledger, pd.DataFrame(new_log)], ignore_index=True) if not ledger.empty else pd.DataFrame(new_log)

    try:
        with pd.ExcelWriter(EXCEL_PATH, engine="openpyxl", mode="w") as w:
            combined.to_excel(w, sheet_name=SHEET_DATA, index=False)
            ledger2.to_excel(w, sheet_name=SHEET_LOG, index=False)
    except PermissionError:
        raise SystemExit(f"🚫 Impossible d’écrire {EXCEL_PATH}. Ferme le fichier dans Excel puis relance.")

    print(f"OK -> {EXCEL_PATH.resolve()}")

def _write_empty_excel():
    cols = REQUIRED_PBI_COLS + ["_source_file","_source_path","_imported_at","_session_key","_player_key"]
    empty_data = pd.DataFrame(columns=cols)
    empty_log  = pd.DataFrame(columns=["filepath","size","mtime_ns","sha1_32","rows","imported_at"])
    EXCEL_PATH.parent.mkdir(parents=True, exist_ok=True)
    with pd.ExcelWriter(EXCEL_PATH, engine="openpyxl", mode="w") as w:
        empty_data.to_excel(w, sheet_name=SHEET_DATA, index=False)
        empty_log.to_excel(w, sheet_name=SHEET_LOG, index=False)
    print(f"OK (vide) -> {EXCEL_PATH.resolve()}")

if __name__ == "__main__":
    run()


SKIP   U19_2025-08-29_GROUP_33-checkpoint.csv (unchanged)
SKIP   U19_2025-10-17_GROUP_70-checkpoint.csv (unchanged)
IMPORT U19_2025-10-25_GROUP-checkpoint.csv
SKIP   U17_2025-10-01_DEVPT.csv (unchanged)
SKIP   U19_2025-08-26_GROUP_30.csv (unchanged)
SKIP   U19_2025-08-27_GROUP_31.csv (unchanged)
SKIP   U19_2025-08-28_GROUP_32.csv (unchanged)
SKIP   U19_2025-08-29_GROUP_33.csv (unchanged)
SKIP   U19_2025-09-01_GROUP_34.csv (unchanged)
SKIP   U19_2025-09-02_GROUP_35.csv (unchanged)
SKIP   U19_2025-09-03_GROUP_36.csv (unchanged)
IMPORT U19_2025-09-04_GROUP_37.csv
SKIP   U19_2025-09-05_GROUP_38.csv (unchanged)
SKIP   U19_2025-09-06_GROUP_39.csv (unchanged)
SKIP   U19_2025-09-08_GROUP_40.csv (unchanged)
SKIP   U19_2025-09-09_GROUP_41.csv (unchanged)
SKIP   U19_2025-09-10_DEVPT.csv (unchanged)
SKIP   U19_2025-09-11_GROUP_43.csv (unchanged)
SKIP   U19_2025-09-12_GROUP_44.csv (unchanged)
SKIP   U19_2025-09-13_GROUP_45.csv (unchanged)
SKIP   U19_2025-09-16_GROUP_46.csv (unchanged)
SKIP   U19_20

  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_dura

IMPORT U19_2025-09-30_GROUP_56.csv
IMPORT U19_2025-10-01_DEVPT.csv
IMPORT U19_2025-10-01_GROUP_57.csv
IMPORT U19_2025-10-02_GROUP_58.csv


  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df["_source_file"] = p.name
  df["_source_path

IMPORT U19_2025-10-03_GROUP_59.csv
IMPORT U19_2025-10-07_GROUP_BENIN.csv
IMPORT U19_2025-10-08_DEVPT.csv
SKIP   U19_2025-10-08_EXTRA_PMA.csv (unchanged)
IMPORT U19_2025-10-15_GROUP_68.csv


  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_dura

IMPORT U19_2025-10-15_INDIV.csv
IMPORT U19_2025-10-16_GROUP_69.csv
IMPORT U19_2025-10-17_GROUP_70.csv


  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df["_source_file"] = p.name
  df["_source_path

SKIP   U19_2025-10-17_GROUP_PRO.csv (unchanged)
IMPORT U19_2025-10-20_GROUP_71.csv
IMPORT U19_2025-10-21_GROUP_72.csv
IMPORT U19_2025-10-22_DEVPT.csv
IMPORT U19_2025-10-22_GROUP_73.csv


  df[col] = pd.NA
  df[col] = pd.NA
  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] =

IMPORT U19_2025-10-23_GROUP_74.csv
IMPORT U19_2025-10-24_GROUP_75.csv
IMPORT U19_2025-10-25_GROUP.csv


  df["_player_key"]  = df.apply(lambda r: _make_player_key(r, PLAYER_NAME_COLS), axis=1)
  df[col] = pd.NA
  df[col] = pd.NA
  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minu

SKIP   U19_2025-10-26_COMP_J8.csv (unchanged)
IMPORT U19_2025-10-29_DEVPT.csv
IMPORT U19_2025-10-29_GROUP_77 (2).csv
SKIP   U19_2025-10-30_GROUP_78.csv (unchanged)
SKIP   U19_2025-10-31_GROUP_79.csv (unchanged)
IMPORT U19_2025-11-01_COMP.csv
IMPORT U19_2025-11-01_GROUP_80.csv


  df["_player_key"]  = df.apply(lambda r: _make_player_key(r, PLAYER_NAME_COLS), axis=1)
  df[col] = pd.NA
  df[col] = pd.NA
  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minu

IMPORT U19_2025-11-06_GROUP_83.csv
IMPORT U19_2025-11-07_GROUP_84.csv
IMPORT U19_2025-11-08_GROUP_85.csv


  df["_player_key"]  = df.apply(lambda r: _make_player_key(r, PLAYER_NAME_COLS), axis=1)
  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = 

IMPORT U19_2025-11-10_GROUP_86.csv
SKIP   U19_2025-11-11_GROUP_87.csv (unchanged)
IMPORT U19_2025-11-12_COMP_MARIGNANE.csv
IMPORT U19_2025-11-12_GROUP_88.csv


  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_dura

IMPORT U19_2025-11-12_INDIV.csv
SKIP   U19_2025-11-13_GROUP_PRO.csv (unchanged)
IMPORT U19_2025-11-13_INDIV.csv
IMPORT U19_2025-11-14_GROUP_90.csv
IMPORT U19_2025-11-14_INDIV.csv


  df["_source_path"] = str(p.resolve())
  df["_imported_at"] = now_utc.strftime("%Y-%m-%d %H:%M:%S%z")
  df["_session_key"] = df.apply(_make_session_key, axis=1)
  df["_player_key"]  = df.apply(lambda r: _make_player_key(r, PLAYER_NAME_COLS), axis=1)
  (norm.str.contains(r"\b(full|whole|entier|entire|complet)\b")) &
  (norm.str.contains(r"\b(training|session|entrainement|entraînement)\b"))
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = df[c].map(_to_minutes_from_any_duration)
  df[newc] = 

IMPORT U19_2025-11-15_GROUP_PRO.csv


  df["_source_path"] = str(p.resolve())
  df["_imported_at"] = now_utc.strftime("%Y-%m-%d %H:%M:%S%z")
  df["_session_key"] = df.apply(_make_session_key, axis=1)
  df["_player_key"]  = df.apply(lambda r: _make_player_key(r, PLAYER_NAME_COLS), axis=1)



Aperçu :
  Player Display Name Session Date   Drill Title  Entries Zone 5 (Absolute)  Entries Zone 6 (Absolute) Total Time  Total Time (min)  Accelerations Total Time Zone 3 (Absolute) (min)  Decelerations Zone 3 (Absolute)
Dione Babacar Vincent   01/10/2025 Full Training                       12.0                        1.0   01:13:09         73.150000                                               0.0                              0.0
           Bedja Samy   01/10/2025 Full Training                        8.0                        0.0   01:13:09         73.150000                                               0.0                              0.0
      Cesarini Gaetan   01/10/2025 Full Training                       10.0                        1.0   01:13:09         73.150000                                               0.0                              0.0
         Makumba Enzo   01/10/2025 Full Training                       11.0                        0.0   01:13:09         73.15000