In [76]:
# Import important standard libraries
import pandas as pd
import numpy as np

# import itertools
import itertools
from datetime import datetime

In [77]:
# Read original data file
combine_raw_df  = pd.read_csv('resources/combine_df.csv').dropna(subset=['Height (in)', 'Weight (lbs)','Broad Jump (in)'])

combine_raw_df.columns

Index(['Year', 'Name', 'College', 'POS', 'Height (in)', 'Weight (lbs)', 'BMI',
       '40 Yard', 'Bench Press', 'Vert Leap (in)', 'Broad Jump (in)',
       'Shuttle', '3Cone'],
      dtype='object')

In [78]:
# Create lists of values that will be our input data.
# use the min and max values from the orignal data as the range.
height = [i for i in range(int(combine_raw_df['Height (in)'].min()*(0.95)),
                               int(combine_raw_df['Height (in)'].astype('int').max()*(1.05)),1)]
weight = [i for i in range(int(combine_raw_df['Weight (lbs)'].min()*(0.95)),
                               int(combine_raw_df['Weight (lbs)'].astype('int').max()*(1.05)),1)]
broad_jump = [i for i in range(int(combine_raw_df['Broad Jump (in)'].min()*(0.95)),
                               int(combine_raw_df['Broad Jump (in)'].astype('int').max()*(1.05)),1)]

# Combine the values into all posiblle combinations

combo_list = list(itertools.product(height,weight,broad_jump))

predictions_df = pd.DataFrame(combo_list, columns=['Height (in)', 'Weight (lbs)','Broad Jump (in)'])
predictions_df.insert(0,'Year',datetime.now().year)

# Print the dataframe
predictions_df.head()

Unnamed: 0,Year,Height (in),Weight (lbs),Broad Jump (in)
0,2024,58,136,6
1,2024,58,136,7
2,2024,58,136,8
3,2024,58,136,9
4,2024,58,136,10


In [79]:
predictions_df.shape

(1118880, 4)

In [80]:
# Import pickle
import pickle

# Import our model from the other Regression ML notebook
loaded_model = pickle.load(open('combine_score_predictive_model.pkl', 'rb'))


In [81]:
predictions = loaded_model.predict(predictions_df)

predictions

array([4.77417501, 4.77417501, 4.77417501, ..., 5.1452306 , 5.1452306 ,
       5.1452306 ])

In [82]:
# Get predictions from each tree
predictions_per_tree = np.array([tree.predict(predictions_df) for tree in loaded_model.estimators_])

# Calculate prediction mean
mean_prediction = np.mean(predictions_per_tree, axis=0)

# Calculate prediction intervals (e.g., 90% confidence)
lower_bound = np.percentile(predictions_per_tree, 5, axis=0)
upper_bound = np.percentile(predictions_per_tree, 95, axis=0)

# Display bounds
print(lower_bound, mean_prediction, upper_bound)

[4.49178571 4.49178571 4.49178571 ... 4.80791579 4.80791579 4.80791579] [4.77417501 4.77417501 4.77417501 ... 5.1452306  5.1452306  5.1452306 ] [5.29193342 5.29193342 5.29193342 ... 5.3339663  5.3339663  5.3339663 ]


In [83]:
predictions_df['Predictions'] = mean_prediction
predictions_df['Lower Bound'] = lower_bound
predictions_df['Upper Bound'] = upper_bound

predictions_df.sample(n=5)

Unnamed: 0,Year,Height (in),Weight (lbs),Broad Jump (in),Predictions,Lower Bound,Upper Bound
680056,2024,75,140,150,4.517112,4.426711,4.615716
909313,2024,80,340,7,5.488129,5.304661,5.688979
1023390,2024,83,300,124,4.999076,4.807916,5.279203
291437,2024,65,215,31,4.861718,4.604726,5.291933
758100,2024,76,398,50,5.468609,5.310864,5.644882


In [84]:
predictions_df.to_csv('40_yard_predicitons.csv')