In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

games_df = pd.read_csv("games.csv")
teamstats_df = pd.read_csv("teamstats.csv")
shots_df = pd.read_csv("shots.csv")

shots_df["assisterID"] = shots_df["assisterID"].fillna(0)

for col in teamstats_df.select_dtypes(include=[np.number]).columns:
    teamstats_df[col] = teamstats_df[col].fillna(teamstats_df[col].median())

teamstats_df["location"] = teamstats_df["location"].map({"h": 1, "a": 0})
teamstats_df["result"] = teamstats_df["result"].map({"W": 1, "D": 0, "L": -1})

teamstats_df["date"] = pd.to_datetime(teamstats_df["date"])
games_df["date"] = pd.to_datetime(games_df["date"])

teamstats_df.sort_values(by=["teamID", "date"], inplace=True)
for stat in ["xGoals", "shots", "shotsOnTarget"]:
    teamstats_df[f"{stat}_rolling5"] = (
        teamstats_df.groupby("teamID")[stat].rolling(5, min_periods=1).mean().reset_index(level=0, drop=True)
    )

features_to_scale = ["xGoals_rolling5", "shots_rolling5", "shotsOnTarget_rolling5"]
scaler = StandardScaler()
teamstats_df[features_to_scale] = scaler.fit_transform(teamstats_df[features_to_scale])

X = teamstats_df[["xGoals_rolling5", "shots_rolling5", "shotsOnTarget_rolling5", "location"]]
y = teamstats_df["result"]

X_discrete = X.copy()
discretizer = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="uniform")
X_discrete[["xGoals_rolling5", "shots_rolling5", "shotsOnTarget_rolling5"]] = discretizer.fit_transform(
    X_discrete[["xGoals_rolling5", "shots_rolling5", "shotsOnTarget_rolling5"]]
)

X_train, X_test, y_train, y_test = train_test_split(X_discrete, y, test_size=0.2, random_state=42)
train_data = X_train.copy()
train_data["result"] = y_train

model = BayesianNetwork([("xGoals_rolling5", "result"), ("shots_rolling5", "result"), ("shotsOnTarget_rolling5", "result"), ("location", "result")])
model.fit(train_data, estimator=MaximumLikelihoodEstimator)

inference = VariableElimination(model)

def batch_predict(inference, X_test):
    predictions = []
    for _, row in X_test.iterrows():
        evidence = {col: int(row[col]) for col in X_test.columns}
        try:
            result = inference.map_query(variables=["result"], evidence=evidence)
            predictions.append(result["result"])
        except:
            predictions.append(0)
    return predictions

predictions = batch_predict(inference, X_test)

print("\nModel Performance:")
print(classification_report(y_test, predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, predictions))
print("Accuracy:", accuracy_score(y_test, predictions))
print("Model Evaluation Completed")
