In [2]:
# --- imports ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import os

# --- Cargar CSV ---
# Ajusta según tu ruta
df = pd.read_csv("data/SP1.csv")

# Mostrar todas las columnas
pd.set_option('display.max_columns', None)

# --- Preparar features ---
df["shots_diff"] = df["HS"] - df["AS"]
df["target_diff"] = df["HST"] - df["AST"]
df["corner_diff"] = df["HC"] - df["AC"]

X = df[["shots_diff", "target_diff", "corner_diff"]]
y = df["FTR"].map({"H": 1, "D": 0, "A": -1})  # Convertir a numérico

# --- Dividir datos ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Entrenar modelo ---
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# --- Evaluar modelo ---
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {acc:.2f}")

# --- Guardar modelo ---
os.makedirs("matches/ml", exist_ok=True)
joblib.dump(model, "matches/ml/model.pkl")
print("✅ Modelo guardado correctamente en matches/ml/model.pkl")

# --- Predicciones para todos los partidos ---
def predict_match(shots_diff, target_diff, corner_diff):
    X_input = np.array([[shots_diff, target_diff, corner_diff]])
    pred = model.predict(X_input)[0]
    if pred == 1:
        return "Victoria local"
    elif pred == 0:
        return "Empate"
    else:
        return "Victoria visitante"

# Crear columna de predicciones
df["prediccion"] = df.apply(lambda row: predict_match(row["shots_diff"], row["target_diff"], row["corner_diff"]), axis=1)

# Mostrar primeras 10 filas con predicción
df.head(10)

FileNotFoundError: [Errno 2] No such file or directory: 'data/SP1.csv'