In [None]:
import sys
from pathlib import Path

# Add project root to Python path
project_root = Path.cwd().parent
sys.path.append(str(project_root))




In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from scipy.stats import linregress

from src import visualization as viz



In [None]:
CSV_FILE = project_root / "data" /  "matches.csv"

df = pd.read_csv(CSV_FILE)

# Feature engineering
df["total_engagements"] = df["kills"] + df["deaths"] + df["assists"]
df["kd_ratio"] = df["kills"] / df["deaths"].replace(0, 1)
df["hs_percent"] = df["hs_percent"].fillna(0)

df.head()


In [None]:
print("Total matches:", len(df))
print("Win rate:", df["win"].mean())
print("\nSummary statistics:")
df[["kd_ratio", "hs_percent", "total_engagements"]].describe()


In [None]:
features = df[["hs_percent", "kd_ratio", "total_engagements"]].fillna(0)
target = df["win"].fillna(0)

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("=== Predictive Analysis (HS%, KD, Engagements) ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Feature importance sorted
importance = pd.DataFrame({
    "feature": features.columns,
    "coefficient": model.coef_[0]
}).sort_values(by="coefficient", key=abs, ascending=False)
print("\nFeature Importance (sorted):")
importance


In [None]:
# Drop matches with unknown win/loss
kd_df = df[["kd_ratio", "win"]].dropna()

kd_only = kd_df[["kd_ratio"]]
win_target = kd_df["win"]

kd_model = LogisticRegression()
kd_model.fit(kd_only, win_target)

def kd_for_win_prob(p):
    a = kd_model.coef_[0][0]
    b = kd_model.intercept_[0]
    return (np.log(p / (1 - p)) - b) / a

for p in [0.5, 0.6, 0.7]:
    print(f"KD needed for {int(p*100)}% win chance: {kd_for_win_prob(p):.3f}")





In [None]:
slope, intercept, r_value, p_value, _ = linregress(
    df["total_engagements"],
    df["kd_ratio"]
)

print("Slope:", slope)
print("Correlation (R):", r_value)
print("P-value:", p_value)

if abs(r_value) < 0.2:
    print("Interpretation: Engagement volume has weak correlation with KD.")



In [None]:
import ipywidgets as widgets
from IPython.display import display

def predict_win_prob(kd):
    logit = kd_model.intercept_[0] + kd_model.coef_[0][0] * kd
    prob = 1 / (1 + np.exp(-logit))
    print(f"Predicted win probability for KD = {kd:.2f}: {prob*100:.2f}%")

kd_slider = widgets.FloatSlider(
    value=1.0,
    min=0.0,
    max=df["kd_ratio"].max()+1,
    step=0.01,
    description='KD:',
    continuous_update=False
)

widgets.interact(predict_win_prob, kd=kd_slider)
