# Predict 40 Yard Dash Times with our Model

In a previous notebook we tested several models and saved the one that served our purposes. In this notebook we'll create the dataset and use our model to predict 40 yard dash times.

In [10]:
# Import important standard libraries
import pandas as pd
import numpy as np

# import itertools
import itertools
from datetime import datetime

In [11]:
# Read original data file
combine_raw_df  = pd.read_csv('resources/combine_df.csv').dropna(subset=['Height (in)', 'Weight (lbs)','Broad Jump (in)'])

combine_raw_df.columns

Index(['Year', 'Name', 'College', 'POS', 'Height (in)', 'Weight (lbs)', 'BMI',
       '40 Yard', 'Bench Press', 'Vert Leap (in)', 'Broad Jump (in)',
       'Shuttle', '3Cone'],
      dtype='object')

In [12]:
# Create lists of values that will be our input data.
# use the min and max values from the orignal data as the range.
height = [i for i in range(int(combine_raw_df['Height (in)'].min()),
                               int(combine_raw_df['Height (in)'].astype('int').max()),1)]
weight = [i for i in range(int(combine_raw_df['Weight (lbs)'].min()),
                               int(combine_raw_df['Weight (lbs)'].astype('int').max()),1)]
broad_jump = [i for i in range(72,150,1)]

# Combine the values into all possible combinations

combo_list = list(itertools.product(height,weight,broad_jump))

predictions_df = pd.DataFrame(combo_list, columns=['Height (in)', 'Weight (lbs)','Broad Jump (in)'])
predictions_df.insert(0,'Year',datetime.now().year)

# Print the dataframe
predictions_df.head()

Unnamed: 0,Year,Height (in),Weight (lbs),Broad Jump (in)
0,2024,61,144,72
1,2024,61,144,73
2,2024,61,144,74
3,2024,61,144,75
4,2024,61,144,76


In [13]:
predictions_df.shape

(398034, 4)

In [14]:
# Import pickle
import pickle

# Import our model from the other Regression ML notebook
loaded_model = pickle.load(open('combine_score_predictive_model.pkl', 'rb'))


In [15]:
predictions = loaded_model.predict(predictions_df)

predictions

array([4.79081192, 4.79081192, 4.79081192, ..., 4.93207831, 4.93207831,
       4.93207831])

In [16]:
# Get predictions from each tree
predictions_per_iteration = np.array([loaded_model.predict(predictions_df, ntree_end=i) for i in range(1, loaded_model.tree_count_ + 1)])

# Calculate prediction mean
mean_prediction = np.mean(predictions_per_iteration, axis=0)

# Calculate prediction intervals 
lower_bound = np.percentile(predictions_per_iteration, 5, axis=0)
upper_bound = np.percentile(predictions_per_iteration, 95, axis=0)

# Display bounds
print(lower_bound, mean_prediction, upper_bound)

[4.76190532 4.76190532 4.76190532 ... 4.82397806 4.82397806 4.82397806] [4.7853803  4.7853803  4.7853803  ... 4.89409935 4.89409935 4.89409935] [4.79663344 4.79663344 4.79663344 ... 4.93253585 4.93253585 4.93253585]


In [22]:
predictions_df['Predictions'] = mean_prediction
predictions_df['Lower Bound'] = lower_bound
predictions_df['Upper Bound'] = upper_bound

predictions_df.sample(20)

Unnamed: 0,Year,Height (in),Weight (lbs),Broad Jump (in),Predictions,Lower Bound,Upper Bound
340326,2024,78,376,84,5.202377,4.928998,5.303213
182789,2024,70,300,107,4.952237,4.873157,4.981516
121433,2024,67,242,137,4.623986,4.585888,4.733161
274441,2024,75,260,109,4.851914,4.812291,4.869874
81621,2024,65,218,105,4.760399,4.748664,4.78051
146396,2024,68,319,140,4.834712,4.811908,4.844105
309250,2024,77,220,130,4.594956,4.552884,4.723339
203309,2024,71,320,113,4.91578,4.840114,4.951619
199975,2024,71,277,133,4.795922,4.787055,4.806229
65215,2024,64,251,79,4.870489,4.815267,4.908585


We now have predictions with upper and lower limits so that we an provide a range of anticipated results through PowerBI.

In [21]:
predictions_df.to_csv('resources/40_yard_predicitons.csv')