In [1]:
import pandas as pd

# Charger les donn√©es brutes
df = pd.read_csv("data/psg_ligue1_2015_2016_events.csv")

# Filtrer les √©v√©nements de type "Shot"
shots_df = df[df["type"] == "Shot"].copy()

# Cr√©er la cible : 1 si but, 0 sinon
shots_df["goal"] = shots_df["shot_outcome"].apply(lambda x: 1 if x == "Goal" else 0)

# S√©lectionner les colonnes pertinentes
features = [
    "x", "y",
    "shot_body_part",
    "shot_technique",
    "under_pressure",
    "shot_first_time",
    "goal",  # target
    "shot_statsbomb_xg",  # pour benchmark
    "team", "player", "minute", "second"
]

shots_df = shots_df[features]

# Nettoyer les valeurs manquantes et convertir
shots_df["under_pressure"] = shots_df["under_pressure"].fillna(False).astype(int)
shots_df["shot_first_time"] = shots_df["shot_first_time"].fillna(False).astype(int)

# Encoder les variables cat√©gorielles si besoin (optionnel pour le mod√®le plus tard)
# shots_df = pd.get_dummies(shots_df, columns=["shot_body_part", "shot_technique"], drop_first=True)

# Enregistrer le fichier pr√™t pour le mod√®le
shots_df.to_csv("data/shots_cleaned.csv", index=False)

print(f"{len(shots_df)} shots extracted and saved to 'data/shots_cleaned.csv'")


554 shots extracted and saved to 'data/shots_cleaned.csv'


  df = pd.read_csv("data/psg_ligue1_2015_2016_events.csv")
  shots_df["under_pressure"] = shots_df["under_pressure"].fillna(False).astype(int)
  shots_df["shot_first_time"] = shots_df["shot_first_time"].fillna(False).astype(int)


In [18]:
from statsbombpy import sb
import pandas as pd
from tqdm import tqdm

# Imaginons que ceci correspond √† la Ligue 1 2015/2016
competition_id = 7   # Hypoth√©tique ID de Ligue 1
season_id = 27       # Hypoth√©tique ID de 2015/2016

# 1. Charger tous les matchs de la saison
matches = sb.matches(competition_id=competition_id, season_id=season_id)

# 2. Filtrer uniquement les matchs du PSG
psg_matches = matches[
    (matches["home_team"].str.contains("Paris", case=False, na=False)) |
    (matches["away_team"].str.contains("Paris", case=False, na=False))
]

# 3. Extraire les √©v√©nements de chaque match du PSG
all_psg_events = []

for match_id in tqdm(psg_matches["match_id"]):
    try:
        events = sb.events(match_id=match_id)
        psg_events = events[events["team"].str.contains("Paris", case=False, na=False)].copy()
        psg_events["match_id"] = match_id

        # üéØ S√©parer les coordonn√©es start/end s'il y a des colonnes coordonn√©es
        if "location" in psg_events.columns:
            psg_events[["x", "y"]] = psg_events["location"].apply(pd.Series)

        if "pass_end_location" in psg_events.columns:
            psg_events[["pass_end_x", "pass_end_y"]] = psg_events["pass_end_location"].apply(pd.Series)

        if "carry_end_location" in psg_events.columns:
            psg_events[["carry_end_x", "carry_end_y"]] = psg_events["carry_end_location"].apply(pd.Series)

        all_psg_events.append(psg_events)

    except Exception as e:
        print(f"Erreur sur le match {match_id} : {e}")
        continue

# 4. Fusionner et exporter
if all_psg_events:
    df_psg_all = pd.concat(all_psg_events).reset_index(drop=True)
    df_psg_all.to_csv("psg_ligue1_2015_2016_events.csv", index=False)
    print(f"‚úÖ {len(df_psg_all)} √©v√©nements PSG extraits avec succ√®s.")
else:
    print("‚ùå Aucun √©v√©nement PSG trouv√©.")


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 37/37 [00:21<00:00,  1.71it/s]


‚úÖ 93416 √©v√©nements PSG extraits avec succ√®s.


In [None]:
import pandas as pd
import numpy as np

# Charger le fichier de tirs
df = pd.read_csv("data/PSG_Marseille_shots_cleaned.csv")

#  Distance au but (centre = [120, 40])
df["distance_to_goal"] = np.sqrt((120 - df["x"])**2 + (40 - df["y"])**2)

# Angle au but (entre les poteaux)
def calculate_angle(x, y):
    goal_width = 7.32  # largeur du but en m√®tres
    a = goal_width / 2
    dx = 120 - x
    dy = np.abs(y - 40)
    return np.arctan2(a, dx) - np.arctan2(-a, dx)

df["angle_to_goal"] = df.apply(lambda row: calculate_angle(row["x"], row["y"]), axis=1)

# Zones tactiques
df["is_central_zone"] = df["y"].between(30, 50).astype(int)
df["is_left_side"] = (df["y"] < 40).astype(int)
df["is_right_side"] = (df["y"] > 40).astype(int)

# Sauvegarde
df.to_csv("data/psg_marseille_shots_enriched.csv", index=False)
print("Fichier enrichi sauvegard√© dans 'psg_shots_enriched.csv'")


‚úÖ Fichier enrichi sauvegard√© dans 'psg_shots_enriched.csv'
