In [67]:
import pandas as pd

# Load your dataset
df = pd.read_csv("player_data.csv")


In [69]:
df["performance_load"] = (
    df["Games_Played"] * df["Minutes_Played"]
    + df["Field_Goals_Attempted"]
    + df["Three_Point_Field_Goals_Attempted"]
    + df["Free_Throws_Attempted"]
)


In [71]:
def assign_risk_label(row):
    injuries = row["Total_Injuries"]
    injury_type = str(row["Most_Common_Injury"]).lower()
    perf_load = row["performance_load"]

    if injuries <= 2 and injury_type in ["eye", "illness", "none"]:
        return "Low"
    elif 3 <= injuries <= 5 and perf_load < 10000:
        return "Low-Medium"
    elif 3 <= injuries <= 5 and perf_load >= 10000:
        return "Medium"
    elif 6 <= injuries <= 10 and perf_load >= 15000:
        return "Medium-High"
    elif injuries > 10 and perf_load >= 20000:
        return "High"
    else:
        return "Medium"


In [73]:
df["predicted_risk_label"] = df.apply(assign_risk_label, axis=1)


In [75]:
print(df["predicted_risk_label"].value_counts())


predicted_risk_label
High           630
Medium         297
Medium-High     73
Name: count, dtype: int64


In [77]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df["risk_encoded"] = label_encoder.fit_transform(df["predicted_risk_label"])


In [79]:
features = [
    "Age", "Height_in_Inches", "Weight_in_Pounds", "Games_Played", "Minutes_Played",
    "Field_Goals_Attempted", "Three_Point_Field_Goals_Attempted",
    "Free_Throws_Attempted", "Steals", "Blocks", "Fouls"
]

X = df[features]
y = df["risk_encoded"]


In [81]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [83]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True, kernel='rbf', C=1),
    "XGBoost": XGBClassifier(eval_metric='mlogloss', random_state=42)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(f"\nModel: {name}")
    print("Accuracy:", accuracy_score(y_test, preds))
    print("Classification Report:\n", classification_report(
        y_test, preds, target_names=label_encoder.classes_
    ))



Model: Random Forest
Accuracy: 0.965
Classification Report:
               precision    recall  f1-score   support

        High       0.99      0.98      0.99       141
      Medium       0.94      1.00      0.97        51
 Medium-High       0.57      0.50      0.53         8

    accuracy                           0.96       200
   macro avg       0.84      0.83      0.83       200
weighted avg       0.96      0.96      0.96       200


Model: SVM
Accuracy: 0.855
Classification Report:
               precision    recall  f1-score   support

        High       0.83      1.00      0.91       141
      Medium       0.97      0.59      0.73        51
 Medium-High       0.00      0.00      0.00         8

    accuracy                           0.85       200
   macro avg       0.60      0.53      0.55       200
weighted avg       0.83      0.85      0.83       200


Model: XGBoost
Accuracy: 0.965
Classification Report:
               precision    recall  f1-score   support

        High 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [85]:
import joblib

# Save the best model (Random Forest) and label encoder
joblib.dump(models["Random Forest"], "rf_injury_model.joblib")
joblib.dump(label_encoder, "risk_label_encoder.joblib")

print("✅ Model and label encoder saved as .joblib files!")


✅ Model and label encoder saved as .joblib files!
