# Baseline Model: Pass Result Prediction
Predicting if a pass will be completed based on player positions at the time of the throw.

In [None]:
import sys
sys.path.append('../src')
from data_loader import DataLoader
from features import create_baseline_features
import polars as pl
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, classification_report
import matplotlib.pyplot as plt

# Load Datasets
loader = DataLoader('../')
tracking_df = loader.load_week_data(1)
plays_df = loader.load_plays()

# Filter for pass_forward event
pass_forward_df = tracking_df.filter(pl.col("event") == "pass_forward")

# Create Features
features_df = create_baseline_features(pass_forward_df)

# Aggregate to Play Level (mean distance to ball for defense vs offense)
# This is a very simple baseline
agg_features = features_df.group_by(["game_id", "play_id"]).agg([
    pl.col("dist_to_ball").mean().alias("avg_dist_to_ball"),
    pl.col("s").mean().alias("avg_speed"),
])

# Join with Target (pass_result)
model_df = agg_features.join(plays_df.select(["game_id", "play_id", "pass_result"]), on=["game_id", "play_id"])

# Filter out known outcomes? (pass_result C/I/S)
model_df = model_df.filter(pl.col("pass_result").is_in(["C", "I", "S", "R"]))
# Map to binary: Complete (C) vs Not (I/S/R)
model_df = model_df.with_columns(
    (pl.col("pass_result") == "C").cast(pl.Int8).alias("label")
)

# Convert to Pandas for XGBoost
pdf = model_df.to_pandas()
X = pdf[["avg_dist_to_ball", "avg_speed"]]
y = pdf["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))