In [1]:
import numpy as np
import pandas as pd

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv("CSVfiles/drafted_2000_to_2024_final_to_prepare_for_first_model.csv")
df

Unnamed: 0.1,Unnamed: 0,Player,Pos,School,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted,Year
0,0,John Abraham,OLB,South Carolina,76,252,4.55,,,,,,1,2000
1,1,Shaun Alexander,RB,Alabama,72,218,4.58,,,,,,1,2000
2,2,Darnell Alford,OT,Boston Col.,76,334,5.56,25.0,23.0,94.0,8.48,4.98,1,2000
3,3,Kyle Allamon,TE,Texas Tech,74,253,4.97,29.0,,104.0,7.29,4.49,0,2000
4,4,Rashard Anderson,CB,Jackson State,74,206,4.55,34.0,,123.0,7.18,4.15,1,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7944,8467,Payton Wilson,LB,North Carolina State,76,233,4.43,34.5,,119.0,,,1,2024
7945,8468,Roman Wilson,WR,Michigan,71,185,4.39,,12.0,,,,1,2024
7946,8469,Mekhi Wingo,DT,LSU,72,284,4.85,31.5,25.0,109.0,,,1,2024
7947,8470,Xavier Worthy,WR,Texas,71,165,4.21,41.0,,131.0,,,1,2024


In [3]:
df = df.drop(["Year","Player","School","Pos", "Unnamed: 0"],axis="columns")
df.head()

Unnamed: 0,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted
0,76,252,4.55,,,,,,1
1,72,218,4.58,,,,,,1
2,76,334,5.56,25.0,23.0,94.0,8.48,4.98,1
3,74,253,4.97,29.0,,104.0,7.29,4.49,0
4,74,206,4.55,34.0,,123.0,7.18,4.15,1


In [4]:
Y = df["isDrafted"]
X = df.drop(["isDrafted"], axis="columns")

In [5]:
# split data into train and test sets
seed = 4902
test_size = 0.30
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

In [6]:
# fit model on training data
model = XGBClassifier()
model.fit(X_train, y_train)

In [7]:
# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

In [8]:
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 68.64%


In [9]:
# fit model on training data
model = XGBClassifier(max_depth=3)
model.fit(X_train, y_train)

# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 69.52%


# Now we get the 2025 Prospects

In [10]:
from splinter import Browser
import os
import csv

import pandas as pd
import numpy as np

from selenium.webdriver.chrome.service import Service

service = Service(executable_path='chromedriver.exe')
browser = Browser('chrome', service=service)

In [11]:
browser.visit(f"https://www.pro-football-reference.com/draft/2025-combine.htm")

In [12]:
table = browser.find_by_css(".table_container")[0]
parsedtable = pd.read_html(table.html)[0]
parsedtable["Year"] = 2025

  parsedtable = pd.read_html(table.html)[0]


In [13]:
combinetable = parsedtable
combinetable

Unnamed: 0,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr),Year
0,BJ Adams,CB,Central Florida,College Stats,6-2,182,4.53,32.5,,117,,,,2025
1,Tommy Akingbesote,DT,Maryland,College Stats,6-4,306,5.09,28.0,,103,,,Dallas Cowboys / 7th / 247th pick / 2025,2025
2,Darius Alexander,DT,Toledo,College Stats,6-4,305,4.95,31.5,28,111,7.60,4.79,New York Giants / 3rd / 65th pick / 2025,2025
3,Zy Alexander,CB,LSU,College Stats,6-1,187,4.56,31.5,,116,,,,2025
4,LeQuint Allen,RB,Syracuse,College Stats,6-0,204,,35.0,,120,,,Jacksonville Jaguars / 7th / 236th pick / 2025,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,Hunter Wohler,SAF,Wisconsin,College Stats,6-2,213,4.57,37.0,,120,6.72,4.25,Indianapolis Colts / 7th / 232nd pick / 2025,2025
331,Ernest Woodard,LB,UNLV,College Stats,6-2,234,,,21,,,,,2025
332,Craig Woodson,SAF,California,College Stats,6-0,200,4.45,36.0,13,127,,,New England Patriots / 4th / 106th pick / 2025,2025
333,Marcus Yarns,RB,Delaware,,5-11,193,4.45,37.5,,122,,,,2025


In [14]:
combinetable = combinetable.drop(["College"], axis='columns')

deleteallnarows = combinetable[(combinetable["Bench"].isna()) & \
(combinetable["40yd"].isna()) & \
(combinetable["Vertical"].isna()) & \
(combinetable["Broad Jump"].isna()) & \
(combinetable["3Cone"].isna()) & \
(combinetable["Shuttle"].isna())].index

combinetable.drop(deleteallnarows , inplace=True)

prospects_2025_df = combinetable.copy()

prospects_2025_df = prospects_2025_df.rename(columns={'Drafted (tm/rnd/yr)': 'isDrafted'})

# Replace values in 'col1' where 'col1' is less than 3
condition2 = prospects_2025_df['isDrafted'].isna()
prospects_2025_df.loc[condition2, 'isDrafted'] = 0

# Replace values in 'col2' based on a condition in 'col1'
condition3 = prospects_2025_df['isDrafted'] != 0
prospects_2025_df.loc[condition3, 'isDrafted'] = 1

In [15]:
prospects_2025_df

Unnamed: 0,Player,Pos,School,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted,Year
0,BJ Adams,CB,Central Florida,6-2,182,4.53,32.5,,117,,,0,2025
1,Tommy Akingbesote,DT,Maryland,6-4,306,5.09,28.0,,103,,,1,2025
2,Darius Alexander,DT,Toledo,6-4,305,4.95,31.5,28,111,7.60,4.79,1,2025
3,Zy Alexander,CB,LSU,6-1,187,4.56,31.5,,116,,,0,2025
4,LeQuint Allen,RB,Syracuse,6-0,204,,35.0,,120,,,1,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,Hunter Wohler,SAF,Wisconsin,6-2,213,4.57,37.0,,120,6.72,4.25,1,2025
331,Ernest Woodard,LB,UNLV,6-2,234,,,21,,,,0,2025
332,Craig Woodson,SAF,California,6-0,200,4.45,36.0,13,127,,,1,2025
333,Marcus Yarns,RB,Delaware,5-11,193,4.45,37.5,,122,,,0,2025


In [16]:
#only rows where a prospect completed in at least one combine event
deleteHT = prospects_2025_df[prospects_2025_df["Ht"] == "Ht"].index
deleteHT

prospects_2025_df.drop(deleteHT , inplace=True)
prospects_2025_df.describe()

Unnamed: 0,Year
count,247.0
mean,2025.0
std,0.0
min,2025.0
25%,2025.0
50%,2025.0
75%,2025.0
max,2025.0


In [17]:
prospects_2025_df

Unnamed: 0,Player,Pos,School,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted,Year
0,BJ Adams,CB,Central Florida,6-2,182,4.53,32.5,,117,,,0,2025
1,Tommy Akingbesote,DT,Maryland,6-4,306,5.09,28.0,,103,,,1,2025
2,Darius Alexander,DT,Toledo,6-4,305,4.95,31.5,28,111,7.60,4.79,1,2025
3,Zy Alexander,CB,LSU,6-1,187,4.56,31.5,,116,,,0,2025
4,LeQuint Allen,RB,Syracuse,6-0,204,,35.0,,120,,,1,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,Hunter Wohler,SAF,Wisconsin,6-2,213,4.57,37.0,,120,6.72,4.25,1,2025
331,Ernest Woodard,LB,UNLV,6-2,234,,,21,,,,0,2025
332,Craig Woodson,SAF,California,6-0,200,4.45,36.0,13,127,,,1,2025
333,Marcus Yarns,RB,Delaware,5-11,193,4.45,37.5,,122,,,0,2025


In [18]:
## we looked at Excel file and we will need to drop all of those players with no height.  They are all from 2021.
#only rows where a prospect completed in at least one combine event
deleteHT = prospects_2025_df[prospects_2025_df["Ht"].isna()].index
deleteHT

prospects_2025_df.drop(deleteHT , inplace=True)

In [19]:
prospects_2025_df

Unnamed: 0,Player,Pos,School,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted,Year
0,BJ Adams,CB,Central Florida,6-2,182,4.53,32.5,,117,,,0,2025
1,Tommy Akingbesote,DT,Maryland,6-4,306,5.09,28.0,,103,,,1,2025
2,Darius Alexander,DT,Toledo,6-4,305,4.95,31.5,28,111,7.60,4.79,1,2025
3,Zy Alexander,CB,LSU,6-1,187,4.56,31.5,,116,,,0,2025
4,LeQuint Allen,RB,Syracuse,6-0,204,,35.0,,120,,,1,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,Hunter Wohler,SAF,Wisconsin,6-2,213,4.57,37.0,,120,6.72,4.25,1,2025
331,Ernest Woodard,LB,UNLV,6-2,234,,,21,,,,0,2025
332,Craig Woodson,SAF,California,6-0,200,4.45,36.0,13,127,,,1,2025
333,Marcus Yarns,RB,Delaware,5-11,193,4.45,37.5,,122,,,0,2025


In [20]:
count = 0

for index,row in prospects_2025_df.iterrows():
    if "-" in row["Ht"]:
        count = count + 1

count
#This is now verified

247

In [21]:
for index,row in prospects_2025_df.iterrows():
    if "-" in row["Ht"]:
        feet = int(row["Ht"].split("-")[0])
        inches = int(row["Ht"].split("-")[1])
        prospects_2025_df.loc[index,"Ht"] = feet * 12 + inches

In [22]:
prospects_2025_df

Unnamed: 0,Player,Pos,School,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted,Year
0,BJ Adams,CB,Central Florida,74,182,4.53,32.5,,117,,,0,2025
1,Tommy Akingbesote,DT,Maryland,76,306,5.09,28.0,,103,,,1,2025
2,Darius Alexander,DT,Toledo,76,305,4.95,31.5,28,111,7.60,4.79,1,2025
3,Zy Alexander,CB,LSU,73,187,4.56,31.5,,116,,,0,2025
4,LeQuint Allen,RB,Syracuse,72,204,,35.0,,120,,,1,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,Hunter Wohler,SAF,Wisconsin,74,213,4.57,37.0,,120,6.72,4.25,1,2025
331,Ernest Woodard,LB,UNLV,74,234,,,21,,,,0,2025
332,Craig Woodson,SAF,California,72,200,4.45,36.0,13,127,,,1,2025
333,Marcus Yarns,RB,Delaware,71,193,4.45,37.5,,122,,,0,2025


In [23]:
prospects_2025_df["Pos"].value_counts()

Pos
WR      43
DT      31
CB      28
RB      28
EDGE    26
LB      19
TE      17
SAF     17
OT      16
G       13
C        5
QB       4
Name: count, dtype: int64

In [24]:
# Using to_numeric() method with downcasting
prospects_2025_df['Wt'] = pd.to_numeric(prospects_2025_df['Wt'], downcast='integer', errors='coerce')

# Using to_numeric() method with downcasting
prospects_2025_df['Ht'] = pd.to_numeric(prospects_2025_df['Ht'], downcast='integer', errors='coerce')

# Using to_numeric() method with downcastingM
prospects_2025_df['isDrafted'] = pd.to_numeric(prospects_2025_df['isDrafted'], downcast='integer', errors='coerce')

# Using to_numeric() method with downcasting
prospects_2025_df['40yd'] = pd.to_numeric(prospects_2025_df['40yd'], errors='coerce')

# Using to_numeric() method with downcasting
prospects_2025_df['Vertical'] = pd.to_numeric(prospects_2025_df['Vertical'], errors='coerce')

# Using to_numeric() method with downcasting
prospects_2025_df['Bench'] = pd.to_numeric(prospects_2025_df['Bench'], errors='coerce')

# Using to_numeric() method with downcasting
prospects_2025_df['Broad Jump'] = pd.to_numeric(prospects_2025_df['Broad Jump'], errors='coerce')

# Using to_numeric() method with downcasting
prospects_2025_df['3Cone'] = pd.to_numeric(prospects_2025_df['3Cone'], errors='coerce')

# Using to_numeric() method with downcasting
prospects_2025_df['Shuttle'] = pd.to_numeric(prospects_2025_df['Shuttle'], errors='coerce')

In [25]:
prospects_2025_df.groupby(["Pos", "isDrafted"])["Ht"].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
Pos,isDrafted,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
C,0,3.0,76.0,1.0,75.0,75.5,76.0,76.5,77.0
C,1,2.0,75.5,0.707107,75.0,75.25,75.5,75.75,76.0
CB,0,11.0,72.454545,1.634848,69.0,71.5,73.0,73.0,75.0
CB,1,17.0,71.588235,1.583462,69.0,71.0,71.0,73.0,75.0
DT,0,12.0,75.25,1.484771,73.0,74.75,75.0,76.0,78.0
DT,1,19.0,75.947368,1.715086,73.0,75.0,76.0,77.0,79.0
EDGE,0,7.0,75.714286,2.058663,73.0,74.5,76.0,77.0,78.0
EDGE,1,19.0,75.789474,1.084176,74.0,75.0,76.0,76.5,78.0
G,0,5.0,77.2,1.095445,76.0,77.0,77.0,77.0,79.0
G,1,8.0,77.25,1.28174,75.0,76.75,77.5,78.0,79.0


In [26]:
for index,row in prospects_2025_df.iterrows():
    if "SAF" in row["Pos"]:
        prospects_2025_df.loc[index,"Pos"] = "S"

In [27]:
prospects_2025_df

Unnamed: 0,Player,Pos,School,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,isDrafted,Year
0,BJ Adams,CB,Central Florida,74,182,4.53,32.5,,117.0,,,0,2025
1,Tommy Akingbesote,DT,Maryland,76,306,5.09,28.0,,103.0,,,1,2025
2,Darius Alexander,DT,Toledo,76,305,4.95,31.5,28.0,111.0,7.60,4.79,1,2025
3,Zy Alexander,CB,LSU,73,187,4.56,31.5,,116.0,,,0,2025
4,LeQuint Allen,RB,Syracuse,72,204,,35.0,,120.0,,,1,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,Hunter Wohler,S,Wisconsin,74,213,4.57,37.0,,120.0,6.72,4.25,1,2025
331,Ernest Woodard,LB,UNLV,74,234,,,21.0,,,,0,2025
332,Craig Woodson,S,California,72,200,4.45,36.0,13.0,127.0,,,1,2025
333,Marcus Yarns,RB,Delaware,71,193,4.45,37.5,,122.0,,,0,2025


In [41]:
prospects_2025_df.to_csv("prospects2025.csv")

# Complete 2025 Prospects

In [28]:
#Testing Data
prospects_df = pd.read_csv("CSVfiles/Prospects2025.csv")
Name = prospects_df["Player"]
Positon = prospects_df["Pos"]
#df = df.drop(["year","name","college","position"],1)
prospects_df = prospects_df.drop(["Year","Player","School","Pos", "Unnamed: 0", "isDrafted"],axis="columns")
prospects_df.head()

Unnamed: 0,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle
0,74,182,4.53,32.5,,117.0,,
1,76,306,5.09,28.0,,103.0,,
2,76,305,4.95,31.5,28.0,111.0,7.6,4.79
3,73,187,4.56,31.5,,116.0,,
4,72,204,,35.0,,120.0,,


In [29]:
# make predictions for test data
y_pred = model.predict(prospects_df)
predictions = [round(value) for value in y_pred]

In [31]:
predictors = pd.DataFrame({"Name":Name,"Position":Positon,"Predictions":predictions,"2025 Actual":"","isCorrect":""})
predictors

Unnamed: 0,Name,Position,Predictions,2025 Actual,isCorrect
0,BJ Adams,CB,1,,
1,Tommy Akingbesote,DT,1,,
2,Darius Alexander,DT,1,,
3,Zy Alexander,CB,1,,
4,LeQuint Allen,RB,1,,
...,...,...,...,...,...
242,Hunter Wohler,S,1,,
243,Ernest Woodard,LB,1,,
244,Craig Woodson,S,1,,
245,Marcus Yarns,RB,1,,


In [46]:
predictors.to_csv("2025 prediction.csv")

# Watch the draft and see the results!