# Predict 40 Yard Dash Times with our Model

In a previous notebook we tested several models and saved the one that served our purposes. In this notebook we'll create the dataset and use our model to predict 40 yard dash times.

In [16]:
# Import important standard libraries
import pandas as pd
import numpy as np

# import itertools
import itertools
from datetime import datetime

In [17]:
# Read original data file
combine_raw_df  = pd.read_csv('resources/combine_df.csv').dropna(subset=['Height (in)', 'Weight (lbs)','Broad Jump (in)'])

combine_raw_df.columns

Index(['Year', 'Name', 'College', 'POS', 'Height (in)', 'Weight (lbs)', 'BMI',
       '40 Yard', 'Bench Press', 'Vert Leap (in)', 'Broad Jump (in)',
       'Shuttle', '3Cone'],
      dtype='object')

In [18]:
# Create lists of values that will be our input data.
# use the min and max values from the orignal data as the range.
height = [i for i in range(int(combine_raw_df['Height (in)'].min()*(0.95)),
                               int(combine_raw_df['Height (in)'].astype('int').max()*(1.05)),1)]
weight = [i for i in range(int(combine_raw_df['Weight (lbs)'].min()*(0.95)),
                               int(combine_raw_df['Weight (lbs)'].astype('int').max()*(1.05)),1)]
broad_jump = [i for i in range(72,150,1)]

# Combine the values into all posiblle combinations

combo_list = list(itertools.product(height,weight,broad_jump))

predictions_df = pd.DataFrame(combo_list, columns=['Height (in)', 'Weight (lbs)','Broad Jump (in)'])
predictions_df.insert(0,'Year',datetime.now().year)

# Print the dataframe
predictions_df.head()

Unnamed: 0,Year,Height (in),Weight (lbs),Broad Jump (in)
0,2024,58,136,72
1,2024,58,136,73
2,2024,58,136,74
3,2024,58,136,75
4,2024,58,136,76


In [19]:
predictions_df.shape

(589680, 4)

In [20]:
# Import pickle
import pickle

# Import our model from the other Regression ML notebook
loaded_model = pickle.load(open('combine_score_predictive_model.pkl', 'rb'))


In [21]:
predictions = loaded_model.predict(predictions_df)

predictions

array([4.74135594, 4.74135594, 4.74135594, ..., 4.98314081, 4.98314081,
       4.98314081])

In [22]:
# Get predictions from each tree
predictions_per_tree = np.array([tree.predict(predictions_df) for tree in loaded_model.estimators_])

# Calculate prediction mean
mean_prediction = np.mean(predictions_per_tree, axis=0)

# Calculate prediction intervals (e.g., 90% confidence)
lower_bound = np.percentile(predictions_per_tree, 5, axis=0)
upper_bound = np.percentile(predictions_per_tree, 95, axis=0)

# Display bounds
print(lower_bound, mean_prediction, upper_bound)

[4.50548661 4.50548661 4.50548661 ... 4.66027778 4.66027778 4.66027778] [4.74135594 4.74135594 4.74135594 ... 4.98314081 4.98314081 4.98314081] [5.05215625 5.05215625 5.05215625 ... 5.20684375 5.20684375 5.20684375]


In [25]:
predictions_df['Predictions'] = mean_prediction
predictions_df['Lower Bound'] = lower_bound
predictions_df['Upper Bound'] = upper_bound

predictions_df.head(10)

Unnamed: 0,Year,Height (in),Weight (lbs),Broad Jump (in),Predictions,Lower Bound,Upper Bound
0,2024,58,136,72,4.741356,4.505487,5.052156
1,2024,58,136,73,4.741356,4.505487,5.052156
2,2024,58,136,74,4.741356,4.505487,5.052156
3,2024,58,136,75,4.741356,4.505487,5.052156
4,2024,58,136,76,4.741356,4.505487,5.052156
5,2024,58,136,77,4.741356,4.505487,5.052156
6,2024,58,136,78,4.741356,4.505487,5.052156
7,2024,58,136,79,4.741356,4.505487,5.052156
8,2024,58,136,80,4.741356,4.505487,5.052156
9,2024,58,136,81,4.741356,4.505487,5.052156


We now have predictions with upper and lower limits so that we an provide a range of anticipated results through PowerBI.

In [24]:
predictions_df.to_csv('resources/40_yard_predicitons.csv')