In [1]:
import numpy as np
import pandas as pd

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv("CSVfiles/drafted_2000_to_2024_final_to_prepare_for_first_model.csv")
df

Unnamed: 0.1,Unnamed: 0,Player,Pos,School,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted,Year
0,0,John Abraham,OLB,South Carolina,76,252,4.55,,,,,,1,2000
1,1,Shaun Alexander,RB,Alabama,72,218,4.58,,,,,,1,2000
2,2,Darnell Alford,OT,Boston Col.,76,334,5.56,25.0,23.0,94.0,8.48,4.98,1,2000
3,3,Kyle Allamon,TE,Texas Tech,74,253,4.97,29.0,,104.0,7.29,4.49,0,2000
4,4,Rashard Anderson,CB,Jackson State,74,206,4.55,34.0,,123.0,7.18,4.15,1,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7944,8467,Payton Wilson,LB,North Carolina State,76,233,4.43,34.5,,119.0,,,1,2024
7945,8468,Roman Wilson,WR,Michigan,71,185,4.39,,12.0,,,,1,2024
7946,8469,Mekhi Wingo,DT,LSU,72,284,4.85,31.5,25.0,109.0,,,1,2024
7947,8470,Xavier Worthy,WR,Texas,71,165,4.21,41.0,,131.0,,,1,2024


In [3]:
df = df.drop(["Year","Player","School","Pos", "Unnamed: 0"],axis="columns")
df.head()

Unnamed: 0,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted
0,76,252,4.55,,,,,,1
1,72,218,4.58,,,,,,1
2,76,334,5.56,25.0,23.0,94.0,8.48,4.98,1
3,74,253,4.97,29.0,,104.0,7.29,4.49,0
4,74,206,4.55,34.0,,123.0,7.18,4.15,1


In [4]:
Y = df["isDrafted"]
X = df.drop(["isDrafted"], axis="columns")

In [5]:
# split data into train and test sets
seed = 4902
test_size = 0.30
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

In [6]:
# Define parameter grid
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.1, 0.01, 0.05],
    'n_estimators': [50, 100, 200]
}

In [7]:
# Create XGBClassifier
model = XGBClassifier(random_state=seed)

In [8]:
# Perform grid search
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=None, n_jobs=None)
grid_search.fit(X_train, y_train)

In [9]:
# Print best score and parameters
print(f"Best score: {grid_search.best_score_:.3f}")
print(f"Best parameters: {grid_search.best_params_}")

Best score: 0.708
Best parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}


In [10]:
# Access best model
best_model = grid_search.best_estimator_

In [11]:
# Save best model
best_model.save_model('best_model.ubj')

In [12]:
# Load saved model
loaded_model = XGBClassifier()
loaded_model.load_model('best_model.ubj')

In [13]:
# Use loaded model for predictions
predictions = loaded_model.predict(X_test)

In [14]:
# Print accuracy score
accuracy = loaded_model.score(X_test, y_test)
print(f"Accuracy: {accuracy:.3f}")

Accuracy: 0.699


In [15]:
#Testing Data
prospects_df = pd.read_csv("CSVfiles/Prospects2025.csv")
Name = prospects_df["Player"]
Positon = prospects_df["Pos"]
#df = df.drop(["year","name","college","position"],1)
prospects_df = prospects_df.drop(["Year","Player","School","Pos", "Unnamed: 0", "isDrafted"],axis="columns")
prospects_df.head()

Unnamed: 0,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle
0,74,182,4.53,32.5,,117.0,,
1,76,306,5.09,28.0,,103.0,,
2,76,305,4.95,31.5,28.0,111.0,7.6,4.79
3,73,187,4.56,31.5,,116.0,,
4,72,204,,35.0,,120.0,,


In [17]:
# make predictions for test data
y_pred = loaded_model.predict(prospects_df)
predictions = [round(value) for value in y_pred]

In [18]:
predictors = pd.DataFrame({"Name":Name,"Position":Positon,"Predictions":predictions,"2025 Actual":"","isCorrect":""})
predictors

Unnamed: 0,Name,Position,Predictions,2025 Actual,isCorrect
0,BJ Adams,CB,1,,
1,Tommy Akingbesote,DT,1,,
2,Darius Alexander,DT,1,,
3,Zy Alexander,CB,1,,
4,LeQuint Allen,RB,0,,
...,...,...,...,...,...
242,Hunter Wohler,S,1,,
243,Ernest Woodard,LB,1,,
244,Craig Woodson,S,1,,
245,Marcus Yarns,RB,1,,


In [19]:
predictors.to_csv("2025 prediction 2.csv")

# Accuracy score increased to 72.1%