In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Import Models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier

df = pd.read_csv('career_stats.csv')
target = 'HOF'

features_to_drop = ['Name', 'QBrec', 'HOF']

X = df.drop(columns=features_to_drop)
y = df[target]

X = X.fillna(0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=3),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

print(f"{'Model':<25} | {'Accuracy':<10}")
print("-" * 40)

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    
    preds = model.predict(X_test_scaled)
    
    acc = accuracy_score(y_test, preds)
    print(f"{name:<25} | {acc:.4f}")
    
    print(classification_report(y_test, preds))

Model                     | Accuracy  
----------------------------------------
Logistic Regression       | 0.8378
              precision    recall  f1-score   support

           0       0.91      0.91      0.91        34
           1       0.00      0.00      0.00         3

    accuracy                           0.84        37
   macro avg       0.46      0.46      0.46        37
weighted avg       0.84      0.84      0.84        37

Decision Tree             | 0.8378
              precision    recall  f1-score   support

           0       0.91      0.91      0.91        34
           1       0.00      0.00      0.00         3

    accuracy                           0.84        37
   macro avg       0.46      0.46      0.46        37
weighted avg       0.84      0.84      0.84        37

KNN                       | 0.8378
              precision    recall  f1-score   support

           0       0.91      0.91      0.91        34
           1       0.00      0.00      0.00         

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
