In [202]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import re
import scipy.stats
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [203]:
data = pd.read_csv("ZCruit_Cleaned_Data.csv")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [204]:
def height_str_to_val(height_str):
    match = re.match("^(\d)'\s*(\d*\.*\d*)", height_str)
    groups = match.groups()
    feet, inches = int(groups[0]), float(groups[1])
    return feet * 12 + inches 

In [205]:
data['Height'] = data['Height'].apply(height_str_to_val)

In [206]:
def create_percentile(position, event):
    """Take in a position - CB, WR, QB, etc. and an event - Fourty Yard Dash, Weight, 3 Cone and return a list of percentile values of 0, 10th, ... 100th percentile. 
        Reverse for all speed events (lower is better) such as 40, 3 Cone, and Shuttle."""
    sorted_athletes = data[data['Position'] == position]     
    event_values = (sorted_athletes[event].astype(float)).dropna()
    percentile = []
    for i in np.arange(0, 1.01, 0.05):
        percentile.append(event_values.quantile(i))
    if (event == "Forty Yard Dash") or (event == "Shuttle") or (event == "3 Cone"):
        return percentile[::-1]
    return percentile

def value_to_percentile(event, value, metric_dict):
    if (event == "Forty Yard Dash") or (event == "Shuttle") or (event == "3 Cone"):
        return round((100 - scipy.stats.percentileofscore(metric_dict[event], value, kind='rank')), 2)
    else:
        return round((scipy.stats.percentileofscore(metric_dict[event], value, kind='rank')), 2)
    
def percentile_to_value(event, percentile, metric_dict):
    if (event == "Forty Yard Dash") or (event == "Shuttle") or (event == "3 Cone"):
        return scipy.stats.scoreatpercentile(metric_dict[event], 100 - percentile)
    else:
        return scipy.stats.scoreatpercentile(metric_dict[event], percentile)
        
def add_score(metric, data, metric_dict):
    # data = data.dropna()
    data[metric + " " + "Percentile"] = data.apply(lambda x: value_to_percentile(metric, x[metric], metric_dict), axis = 1)
    return data

def nan_counter(data, metrics):
    nan_dict, readable_dict = {}, {}
    for metric in metrics:
        nan_dict[metric] = data[metric].isna().sum() / data.shape[0]
        readable_dict[metric] = "There are {} NaNs out of {} players".format(data[metric].isna().sum(), data.shape[0])
    print(readable_dict)
    return nan_dict

def master_fillna(data, position, metrics):
    # Filtering data    
    data = data[data["Position"] == position]
    data_with_metrics = data[metrics]
    
    # Getting NaN information
    nan_info = nan_counter(data_with_metrics, metrics)
    
    # Classifying metrics into complete, incomplete, and drop
    complete_metric, incomplete_metric, drop_metric = [], [], []
    for metric in nan_info.keys():
        if nan_info[metric] == 0:
            complete_metric.append(metric)
        elif metric in ["Forty Yard Dash", "3 Cone", "Shuttle", "Vertical Jump", "Broad Jump", "Wingspan", "Arm Length","Hand Size", "Powerball Toss"]:
            incomplete_metric.append(metric)
        elif nan_info[metric] > 0.975:
            drop_metric.append(metric)
        else:
            incomplete_metric.append(metric)
    
    data_with_metrics = data_with_metrics.drop(columns = drop_metric)
    
    metric_dict = {}
    for metric in data_with_metrics.columns:
        metric_dict[metric] = create_percentile(position, metric)
    # We want to fill in the incomplete metrics
    completed_metrics = {}
    for metric in incomplete_metric:
        correlations = data_with_metrics.corr()[metric]
        correlations = correlations[correlations != 1].sort_values(ascending = False)
        metric_filter = correlations > 0.4 
        best_predictors = metric_filter[metric_filter].index.values

        print(metric, best_predictors)
    
    #IF there are no good predictors for linear regression (ie: no metrics with correlation > 0.4 with incomplete metric),
    #then we use the average approach. 
        if len(best_predictors) == 0:
            #For example, if we are predicting Hand Size with the average approach. We want the percentiles for Height, Weight, and Wingspan
            all_other_metrics = incomplete_metric + complete_metric
            all_other_metrics.remove(metric)
            
            all_other_percentiles = data_with_metrics.copy()
            for percentile_metrics in all_other_metrics:
                all_other_percentiles = add_score(percentile_metrics, all_other_percentiles, metric_dict)
                
            empty_metric = all_other_percentiles[all_other_percentiles[metric].isna()]
            full_metric = all_other_percentiles[all_other_percentiles[metric].notna()]
            
            metric_percentile_names =  [x + " " + "Percentile" for x in all_other_metrics]
            only_percentiles = empty_metric[metric_percentile_names]
            
            only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
            
            only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
            empty_metric[metric] = only_percentiles['Predicted' + " " + metric]
            
            all_metric = empty_metric.append(full_metric).sort_index()
            
            completed_metrics[metric] = all_metric[metric].values
            #print(completed_metrics, len(completed_metrics[metric]), np.count_nonzero(~np.isnan(completed_metrics[metric])))

        else:
            full_metric = data_with_metrics[data_with_metrics[metric].notna()]
            empty_metric = data_with_metrics[data_with_metrics[metric].isna()]
            print("There are {} players with metric and {} NaNs.".format(full_metric.shape[0], empty_metric.shape[0]))
            labels = [best_predictors[0]] + [metric]
            predictor = best_predictors[0]
            print(labels)
            full_metric_with_no_predictor_nulls = full_metric[labels].dropna()
            X = full_metric_with_no_predictor_nulls[[predictor]]
            y = full_metric_with_no_predictor_nulls[metric]
            
            reg = LinearRegression(normalize = True).fit(X,y)
            metric_mean = np.median(y)
            
            empty_metric_with_no_predictor_nulls = empty_metric[[predictor]].dropna()
            if len(empty_metric_with_no_predictor_nulls) == 0:
                empty_metric_with_no_predictor_nulls[metric] = metric_mean
            else: 
                empty_metric_with_no_predictor_nulls[metric] = reg.predict(empty_metric_with_no_predictor_nulls)

            empty_metric_with_predictor_nulls = empty_metric[empty_metric[predictor].isna()]
            empty_metric_with_predictor_nulls[metric] = metric_mean
            
            print("There are {} players with a {} (predictor) value and {} players with NaNs for {}.".format(empty_metric_with_no_predictor_nulls.shape[0], best_predictors[0], empty_metric_with_predictor_nulls.shape[0], best_predictors[0]))
            print(empty_metric_with_no_predictor_nulls.shape[0] +  empty_metric_with_predictor_nulls.shape[0] == empty_metric.shape[0])

            empty_metric = empty_metric_with_no_predictor_nulls.append(empty_metric_with_predictor_nulls).sort_index()
            
            all_metric = empty_metric.append(full_metric).sort_index()
            
            completed_metrics[metric] = all_metric[metric].values
            
            train_y = reg.predict(X)
            train_error = mean_squared_error(y, train_y)
            print(reg.coef_)
            print("The RMSE of the model on the training data is {}. The average {} of the training data is {}. On average, predictions are {} from the actual values in the training data".format(np.sqrt(train_error), metric, np.mean(y), np.sqrt(train_error)/np.mean(y)))
            #print(completed_metrics, len(completed_metrics[metric]), np.count_nonzero(~np.isnan(completed_metrics[metric])))
    
    for metric in incomplete_metric:
        print(metric)
        print("There are {} NaN values for this metric".format(data[metric].isna().sum()))
        data[metric] = completed_metrics[metric]
        print("There are {} NaN values for this metric".format(data[metric].isna().sum()))
    
    all_metrics = ["Full Name", "Position"] + complete_metric + incomplete_metric
    cleaned_dataset = data[all_metrics]
    
    return cleaned_dataset

In [207]:
data[data['Position'] == 'RB']

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,...,Hand Size Source,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38
23,Camar Wheaton,Wheaton,2021,RB,71.0,national_preps,190.0,national_preps,95.4848,Alabama,...,,,,,,2.70,,,,
28,Donovan Edwards,Edwards,2021,RB,70.5,nike_opening,203.0,nike_opening,95.4242,Michigan,...,,,,,,3.20,,,,
41,Xavian Sorey,Sorey,2021,RB,74.0,rivals_camp,207.0,rivals_camp,95.1212,Georgia,...,rivals_camp,,,,,2.60,,,,
45,Will Shipley,Shipley,2021,RB,71.0,nike_opening,198.0,nike_opening,94.8182,Clemson,...,rivals_camp,,,,,4.33,,,,
48,TreVeyon Henderson,Henderson,2021,RB,69.5,rivals_camp,177.5,rivals_camp,94.7273,Ohio State,...,rivals_camp,,,,,4.50,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71332,Cassius Sadler,Sadler,2021,RB,64.0,rivals,107.0,rivals,,,...,,,,,,,,,,
71339,Moses Perez,Perez,2023,RB,60.0,rivals,104.0,rivals,,,...,,,,,,,,,,
71346,Carmine D’Alessandro,D’Alessandro,2022,RB,65.0,national_preps,100.0,national_preps,,,...,,,,,,,,,,
71348,Al'jay Pipkins,Pipkins,2021,RB,61.0,rivals,100.0,rivals,,,...,,,,,,,,,,


In [208]:
master_fillna(data, "RB", ["Forty Yard Dash", "Shuttle", "3 Cone"])

{'Forty Yard Dash': 'There are 4197 NaNs out of 9080 players', 'Shuttle': 'There are 6000 NaNs out of 9080 players', '3 Cone': 'There are 6916 NaNs out of 9080 players'}
Forty Yard Dash ['3 Cone' 'Shuttle']
There are 4883 players with metric and 4197 NaNs.
['3 Cone', 'Forty Yard Dash']
There are 63 players with a 3 Cone (predictor) value and 4134 players with NaNs for 3 Cone.
True
[0.33403925]
The RMSE of the model on the training data is 0.23836276682738344. The average Forty Yard Dash of the training data is 5.023432175154691. On average, predictions are 0.04745018117419756 from the actual values in the training data
Shuttle ['3 Cone' 'Forty Yard Dash']
There are 3080 players with metric and 6000 NaNs.
['3 Cone', 'Shuttle']
There are 21 players with a 3 Cone (predictor) value and 5979 players with NaNs for 3 Cone.
True
[0.37746799]
The RMSE of the model on the training data is 0.23545532572356265. The average Shuttle of the training data is 4.682819412039187. On average, predictions 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Unnamed: 0,Full Name,Position,Forty Yard Dash,Shuttle,3 Cone
23,Camar Wheaton,RB,4.500,4.654,7.750000
28,Donovan Edwards,RB,4.740,4.500,7.597472
41,Xavian Sorey,RB,4.600,4.654,7.750000
45,Will Shipley,RB,4.460,4.210,6.940000
48,TreVeyon Henderson,RB,4.300,4.654,7.750000
...,...,...,...,...,...
71332,Cassius Sadler,RB,4.997,4.654,7.750000
71339,Moses Perez,RB,4.997,4.654,7.750000
71346,Carmine D’Alessandro,RB,4.997,4.654,7.750000
71348,Al'jay Pipkins,RB,4.997,4.654,7.750000


In [209]:
data[data['Position'] == 'RB'][['Full Name', 'Position', 'Forty Yard Dash', 'Shuttle', '3 Cone']]

Unnamed: 0,Full Name,Position,Forty Yard Dash,Shuttle,3 Cone
23,Camar Wheaton,RB,4.50,,
28,Donovan Edwards,RB,4.74,4.50,
41,Xavian Sorey,RB,4.60,,
45,Will Shipley,RB,4.46,4.21,6.94
48,TreVeyon Henderson,RB,4.30,,
...,...,...,...,...,...
71332,Cassius Sadler,RB,,,
71339,Moses Perez,RB,,,
71346,Carmine D’Alessandro,RB,,,
71348,Al'jay Pipkins,RB,,,


In [210]:
positions = ["WR",
"RB", 
"LB",
"QB", 
"DE", 
"CB", 
"OL",
"DT", 
"S", 
"OT",  
"OG",  
"TE",  
"ATH", 
"DB",  
"DL", 
"OC",  
"K", 
"FB",  
"LS", 
"P"]

combine_metrics = ["Height", "Weight", "Forty Yard Dash", "3 Cone", "Shuttle", "Vertical Jump", "Broad Jump"]
zcruit_metrics = ["Wingspan","Arm Length","Hand Size", "Powerball Toss"]

In [211]:
%%time
cleaned_combine_metrics_position_dataframes = {}
for position in positions:
    cleaned_combine_metrics_position_dataframes[position] = master_fillna(data, position, combine_metrics + zcruit_metrics)

{'Height': 'There are 0 NaNs out of 12359 players', 'Weight': 'There are 0 NaNs out of 12359 players', 'Forty Yard Dash': 'There are 6235 NaNs out of 12359 players', '3 Cone': 'There are 9546 NaNs out of 12359 players', 'Shuttle': 'There are 8483 NaNs out of 12359 players', 'Vertical Jump': 'There are 8608 NaNs out of 12359 players', 'Broad Jump': 'There are 8801 NaNs out of 12359 players', 'Wingspan': 'There are 11947 NaNs out of 12359 players', 'Arm Length': 'There are 12082 NaNs out of 12359 players', 'Hand Size': 'There are 11963 NaNs out of 12359 players', 'Powerball Toss': 'There are 12340 NaNs out of 12359 players'}
Forty Yard Dash []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

3 Cone ['Powerball Toss' 'Shuttle']
There are 2813 players with metric and 9546 NaNs.
['Powerball Toss', '3 Cone']
There are 17 players with a Powerball Toss (predictor) value and 9529 players with NaNs for Powerball Toss.
True
[0.02733333]
The RMSE of the model on the training data is 0.0. The average 3 Cone of the training data is 7.0215. On average, predictions are 0.0 from the actual values in the training data
Shuttle ['3 Cone']
There are 3876 players with metric and 8483 NaNs.
['3 Cone', 'Shuttle']
There are 35 players with a 3 Cone (predictor) value and 8448 players with NaNs for 3 Cone.
True
[0.38332496]
The RMSE of the model on the training data is 0.24604069736686787. The average Shuttle of the training data is 4.6740147588193075. On average, predictions are 0.05264011991032302 from the actual values in the training data
Vertical Jump ['Powerball Toss']
There are 3751 players with metric and 8608 NaNs.
['Powerball Toss', 'Vertical Jump']
There are 1 players with a Powerball T

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Height' 'Arm Length' 'Weight']
There are 412 players with metric and 11947 NaNs.
['Height', 'Wingspan']
There are 11947 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.95391507]
The RMSE of the model on the training data is 2.3533220050715538. The average Wingspan of the training data is 73.49029126213593. On average, predictions are 0.03202221633164278 from the actual values in the training data
Arm Length ['Wingspan' 'Height' 'Weight']
There are 277 players with metric and 12082 NaNs.
['Wingspan', 'Arm Length']
There are 135 players with a Wingspan (predictor) value and 11947 players with NaNs for Wingspan.
True
[0.3974528]
The RMSE of the model on the training data is 1.4588901732158246. The average Arm Length of the training data is 31.645126353790616. On average, predictions are 0.04610157522853661 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['3 Cone' 'Vertical Jump']
There are 19 players with metric and 12340 NaNs.
['3 Cone', 'Powerball Toss']
There are 2811 players with a 3 Cone (predictor) value and 9529 players with NaNs for 3 Cone.
True
[36.58536585]
The RMSE of the model on the training data is 4.4938668397781776e-14. The average Powerball Toss of the training data is 38.75. On average, predictions are 1.1597075715556587e-15 from the actual values in the training data
Forty Yard Dash
There are 6235 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 9546 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 8483 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 8608 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 8801 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 11947 NaN values for this metric
There are 0 NaN values for this

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Height' 'Arm Length' 'Weight']
There are 270 players with metric and 8810 NaNs.
['Height', 'Wingspan']
There are 8810 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.97635539]
The RMSE of the model on the training data is 2.3419878324285506. The average Wingspan of the training data is 71.59907407407407. On average, predictions are 0.03270975026863624 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
There are 157 players with metric and 8923 NaNs.
['Wingspan', 'Arm Length']
There are 113 players with a Wingspan (predictor) value and 8810 players with NaNs for Wingspan.
True
[0.34679]
The RMSE of the model on the training data is 1.2674899811916. The average Arm Length of the training data is 30.94904458598726. On average, predictions are 0.04095409076910501 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 4197 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 6916 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 6000 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 6080 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 6206 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 8810 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 8923 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 8822 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 9063 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 8832 players', 'Weight': 'There are 0 NaNs out of 8832 players', 'Forty Yard Dash': 'There are 4132 NaNs out of 8832 players', '3 Cone

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Height' 'Arm Length']
There are 236 players with metric and 8596 NaNs.
['Height', 'Wingspan']
There are 8596 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.79324485]
The RMSE of the model on the training data is 2.3552396022791497. The average Wingspan of the training data is 74.41271186440679. On average, predictions are 0.031651038421645156 from the actual values in the training data
Arm Length ['Wingspan']
There are 126 players with metric and 8706 NaNs.
['Wingspan', 'Arm Length']
There are 110 players with a Wingspan (predictor) value and 8596 players with NaNs for Wingspan.
True
[0.2739041]
The RMSE of the model on the training data is 1.2760844311014996. The average Arm Length of the training data is 31.989682539682537. On average, predictions are 0.03989049999225667 from the actual values in the training data
Hand Size ['Forty Yard Dash']
There are 228 players with metric and 8604 NaNs.
['Forty Yard Dash', 'Hand Size']
There are 45

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


There are 2048 players with a 3 Cone (predictor) value and 6757 players with NaNs for 3 Cone.
True
[4.26783421]
The RMSE of the model on the training data is 2.3722216512990872. The average Powerball Toss of the training data is 34.8125. On average, predictions are 0.06814281224557522 from the actual values in the training data
Forty Yard Dash
There are 4132 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 6776 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 5721 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 5899 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 6040 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 8596 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 8706 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 8604 Na

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Arm Length' 'Height' 'Weight']
There are 320 players with metric and 6025 NaNs.
['Arm Length', 'Wingspan']
There are 0 players with a Arm Length (predictor) value and 6025 players with NaNs for Arm Length.
True
[1.15778665]
The RMSE of the model on the training data is 2.2360715206828625. The average Wingspan of the training data is 74.26020408163265. On average, predictions are 0.030111303198477574 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
There are 196 players with metric and 6149 NaNs.
['Wingspan', 'Arm Length']
There are 124 players with a Wingspan (predictor) value and 6025 players with NaNs for Wingspan.
True
[0.39120008]
The RMSE of the model on the training data is 1.2997831090287304. The average Arm Length of the training data is 31.7359693877551. On average, predictions are 0.04095614957109942 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['3 Cone' 'Height' 'Weight']
There are 25 players with metric and 6320 NaNs.
['3 Cone', 'Powerball Toss']
There are 1282 players with a 3 Cone (predictor) value and 5038 players with NaNs for 3 Cone.
True
[15.0841432]
The RMSE of the model on the training data is 2.5211271378479276. The average Powerball Toss of the training data is 32.357142857142854. On average, predictions are 0.07791562898426267 from the actual values in the training data
Forty Yard Dash
There are 3160 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 5056 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 4329 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 4447 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 4570 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 6025 NaN values for this metric
There are 0 NaN values for

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.25969432]
The RMSE of the model on the training data is 5.225262105190281. The average Vertical Jump of the training data is 23.202470672389108. On average, predictions are 0.22520283201600302 from the actual values in the training data
Broad Jump ['Vertical Jump']
There are 1547 players with metric and 2682 NaNs.
['Vertical Jump', 'Broad Jump']
There are 197 players with a Vertical Jump (predictor) value and 2485 players with NaNs for Vertical Jump.
True
[0.87198766]
The RMSE of the model on the training data is 9.57485101358244. The average Broad Jump of the training data is 92.66634477825465. On average, predictions are 0.10332608927755292 from the actual values in the training data
Wingspan ['Height' 'Arm Length']
There are 195 players with metric and 4034 NaNs.
['Height', 'Wingspan']
There are 4034 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.89054897]
The RMSE of the model on the training data is 2.36857566879391. The average Wingspan of 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean


[7.208972]
The RMSE of the model on the training data is 4.93594780040838. The average Hand Size of the training data is 11.869921259842522. On average, predictions are 0.4158366085466236 from the actual values in the training data
Powerball Toss ['Vertical Jump']
There are 10 players with metric and 4219 NaNs.
['Vertical Jump', 'Powerball Toss']
There are 1586 players with a Vertical Jump (predictor) value and 2633 players with NaNs for Vertical Jump.
True
[0.53618629]
The RMSE of the model on the training data is 3.2954989810234254. The average Powerball Toss of the training data is 38.77777777777778. On average, predictions are 0.08498421441034622 from the actual values in the training data
Forty Yard Dash
There are 1971 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 3086 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 2536 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


Forty Yard Dash ['Shuttle' '3 Cone']
There are 2233 players with metric and 1743 NaNs.
['Shuttle', 'Forty Yard Dash']
There are 81 players with a Shuttle (predictor) value and 1662 players with NaNs for Shuttle.
True
[0.72924326]
The RMSE of the model on the training data is 0.3043254448264179. The average Forty Yard Dash of the training data is 4.993793517406971. On average, predictions are 0.060940734486843375 from the actual values in the training data
3 Cone ['Shuttle' 'Forty Yard Dash']
There are 1331 players with metric and 2645 NaNs.
['Shuttle', '3 Cone']
There are 426 players with a Shuttle (predictor) value and 2219 players with NaNs for Shuttle.
True
[0.96738252]
The RMSE of the model on the training data is 0.3879621482020245. The average 3 Cone of the training data is 7.768277819833468. On average, predictions are 0.049941847755689744 from the actual values in the training data
Shuttle ['Forty Yard Dash' '3 Cone']
There are 1747 players with metric and 2229 NaNs.
['Forty Ya

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Height' 'Arm Length' 'Powerball Toss' 'Weight']
There are 286 players with metric and 3690 NaNs.
['Height', 'Wingspan']
There are 3690 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.89487783]
The RMSE of the model on the training data is 2.2169774236918944. The average Wingspan of the training data is 72.64982517482517. On average, predictions are 0.030515936113499804 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
There are 194 players with metric and 3782 NaNs.
['Wingspan', 'Arm Length']
There are 92 players with a Wingspan (predictor) value and 3690 players with NaNs for Wingspan.
True
[0.33647196]
The RMSE of the model on the training data is 1.3359766373674677. The average Arm Length of the training data is 31.323453608247423. On average, predictions are 0.04265100055939256 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Height' 'Wingspan' 'Weight']
There are 10 players with metric and 3966 NaNs.
['Height', 'Powerball Toss']
There are 3966 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[1.12397921]
The RMSE of the model on the training data is 2.315556954365124. The average Powerball Toss of the training data is 32.85. On average, predictions are 0.07048879617549846 from the actual values in the training data
Forty Yard Dash
There are 1743 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 2645 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 2229 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 2229 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 2305 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 3690 NaN values for this metric
There are 0 NaN values for this metric
Ar

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 50 players with a Broad Jump (predictor) value and 3523 players with NaNs for Broad Jump.
True
[0.30573538]
The RMSE of the model on the training data is 3.1321022759956083. The average Vertical Jump of the training data is 21.264050561797752. On average, predictions are 0.14729565596606667 from the actual values in the training data
Broad Jump ['Vertical Jump' 'Hand Size']
There are 228 players with metric and 3553 NaNs.
['Vertical Jump', 'Broad Jump']
There are 30 players with a Vertical Jump (predictor) value and 3523 players with NaNs for Vertical Jump.
True
[1.82522292]
The RMSE of the model on the training data is 7.652811918465759. The average Broad Jump of the training data is 85.54292134831461. On average, predictions are 0.08946166202700695 from the actual values in the training data
Wingspan ['Height']
There are 8 players with metric and 3773 NaNs.
['Height', 'Wingspan']
There are 3773 players with a Height (predictor) value and 0 players with NaNs for Height.
True

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Hand Size ['Broad Jump' 'Vertical Jump']
There are 8 players with metric and 3773 NaNs.
['Broad Jump', 'Hand Size']
There are 221 players with a Broad Jump (predictor) value and 3552 players with NaNs for Broad Jump.
True
[0.28582265]
The RMSE of the model on the training data is 4.139409295137822. The average Hand Size of the training data is 11.5. On average, predictions are 0.3599486343598106 from the actual values in the training data
Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 2692 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 3623 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 3499 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 3573 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 3553 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 3773 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 3781 NaN values for this metric
There are 3781 NaN values for this metric
Hand Size
There are 3773 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 3781 NaN values for this metric
There are 3781 NaN values for this metric
{'Height': 'There are 0 NaNs out of 2994 players', 'Weight': 'There are 0 NaNs out of 2994 players', 'Forty Yard Dash': 'There are 1380 NaNs out of 2994 players', '

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Powerball Toss' 'Height' 'Arm Length']
There are 125 players with metric and 2869 NaNs.
['Powerball Toss', 'Wingspan']
There are 1 players with a Powerball Toss (predictor) value and 2868 players with NaNs for Powerball Toss.
True
[0.47816781]
The RMSE of the model on the training data is 2.3020771994174374. The average Wingspan of the training data is 76.29166666666667. On average, predictions are 0.030174687485537133 from the actual values in the training data
Arm Length ['Wingspan' 'Height' 'Hand Size']
There are 79 players with metric and 2915 NaNs.
['Wingspan', 'Arm Length']
There are 46 players with a Wingspan (predictor) value and 2869 players with NaNs for Wingspan.
True
[0.37559427]
The RMSE of the model on the training data is 1.4186239566353043. The average Arm Length of the training data is 32.92088607594937. On average, predictions are 0.04309191293826359 from the actual values in the training data
Hand Size ['Arm Length']
There are 115 players with metric and 2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Wingspan ['Height' 'Arm Length']
There are 117 players with metric and 2754 NaNs.
['Height', 'Wingspan']
There are 2754 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.85510438]
The RMSE of the model on the training data is 2.1250495958460647. The average Wingspan of the training data is 73.78418803418803. On average, predictions are 0.028800880682747627 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
There are 87 players with metric and 2784 NaNs.
['Wingspan', 'Arm Length']
There are 30 players with a Wingspan (predictor) value and 2754 players with NaNs for Wingspan.
True
[0.28949406]
The RMSE of the model on the training data is 1.2548922810492908. The average Arm Length of the training data is 31.597701149425287. On average, predictions are 0.0397146702260052 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 1296 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 2097 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1698 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1771 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1805 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2754 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2784 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 2760 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 2865 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 2599 players', 'Weight': 'There are 0 NaNs out of 2599 players', 'Forty Yard Dash': 'There are 1400 NaNs out of 2599 players', '3 Cone

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Arm Length']
There are 123 players with metric and 2476 NaNs.
['Arm Length', 'Wingspan']
There are 0 players with a Arm Length (predictor) value and 2476 players with NaNs for Arm Length.
True
[1.03006815]
The RMSE of the model on the training data is 2.2260404014498616. The average Wingspan of the training data is 79.46220930232558. On average, predictions are 0.028013824697228917 from the actual values in the training data
Arm Length ['Wingspan']
There are 86 players with metric and 2513 NaNs.
['Wingspan', 'Arm Length']
There are 37 players with a Wingspan (predictor) value and 2476 players with NaNs for Wingspan.
True
[0.35796096]
The RMSE of the model on the training data is 1.3122539700270295. The average Arm Length of the training data is 34.122093023255815. On average, predictions are 0.03845760484659211 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Shuttle' 'Weight']
There are 7 players with metric and 2592 NaNs.
['Shuttle', 'Powerball Toss']
There are 839 players with a Shuttle (predictor) value and 1753 players with NaNs for Shuttle.
True
[28.48246867]
The RMSE of the model on the training data is 4.168239883411639. The average Powerball Toss of the training data is 30.071428571428573. On average, predictions are 0.138611302536254 from the actual values in the training data
Forty Yard Dash
There are 1400 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 2098 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1753 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1855 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1883 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2476 NaN values for this metric
There are 0 NaN values for this met

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Wingspan ['Arm Length' 'Powerball Toss' 'Height' 'Weight']
There are 92 players with metric and 2329 NaNs.
['Arm Length', 'Wingspan']
There are 0 players with a Arm Length (predictor) value and 2329 players with NaNs for Arm Length.
True
[1.760422]
The RMSE of the model on the training data is 2.015625649082049. The average Wingspan of the training data is 76.63068181818181. On average, predictions are 0.026303115165599515 from the actual values in the training data
Arm Length ['Wingspan' 'Height' 'Weight']
There are 44 players with metric and 2377 NaNs.
['Wingspan', 'Arm Length']
There are 48 players with a Wingspan (predictor) value and 2329 players with NaNs for Wingspan.
True
[0.38849945]
The RMSE of the model on the training data is 0.9468838047934871. The average Arm Length of the training data is 32.49431818181818. On average, predictions are 0.029139980703558967 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Powerball Toss ['Wingspan' 'Vertical Jump']
There are 5 players with metric and 2416 NaNs.
['Wingspan', 'Powerball Toss']
There are 89 players with a Wingspan (predictor) value and 2327 players with NaNs for Wingspan.
True
[0.57055215]
The RMSE of the model on the training data is 0.6555177163758565. The average Powerball Toss of the training data is 34.5. On average, predictions are 0.01900051351814077 from the actual values in the training data
Forty Yard Dash
There are 1170 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1897 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1506 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1647 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1670 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2329 NaN values for this metric
There are 0 NaN values for this metri

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


Forty Yard Dash ['3 Cone' 'Shuttle']
There are 958 players with metric and 1343 NaNs.
['3 Cone', 'Forty Yard Dash']
There are 9 players with a 3 Cone (predictor) value and 1334 players with NaNs for 3 Cone.
True
[0.44688351]
The RMSE of the model on the training data is 0.29906932973768674. The average Forty Yard Dash of the training data is 5.341457516339868. On average, predictions are 0.05599021031671863 from the actual values in the training data
3 Cone ['Powerball Toss' 'Arm Length' 'Shuttle' 'Forty Yard Dash']
There are 315 players with metric and 1986 NaNs.
['Powerball Toss', '3 Cone']
There are 5 players with a Powerball Toss (predictor) value and 1981 players with NaNs for Powerball Toss.
True
[0.062]
The RMSE of the model on the training data is 1.4043333874306805e-15. The average 3 Cone of the training data is 7.904. On average, predictions are 1.7767375853120958e-16 from the actual values in the training data
Shuttle ['Arm Length' '3 Cone' 'Forty Yard Dash']
There are 520 p

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

['Vertical Jump' 'Height']
There are 456 players with metric and 1845 NaNs.
['Vertical Jump', 'Broad Jump']
There are 85 players with a Vertical Jump (predictor) value and 1760 players with NaNs for Vertical Jump.
True
[2.0019855]
The RMSE of the model on the training data is 8.083057672176594. The average Broad Jump of the training data is 96.14485294117648. On average, predictions are 0.08407166296381965 from the actual values in the training data
Wingspan ['Arm Length' 'Height']
There are 46 players with metric and 2255 NaNs.
['Arm Length', 'Wingspan']
There are 0 players with a Arm Length (predictor) value and 2255 players with NaNs for Arm Length.
True
[1.22301221]
The RMSE of the model on the training data is 1.5575970166847912. The average Wingspan of the training data is 76.81521739130434. On average, predictions are 0.0202771933684733 from the actual values in the training data
Arm Length ['3 Cone' 'Shuttle' 'Wingspan' 'Height']
There are 23 players with metric and 2278 NaNs.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 313 players with a 3 Cone (predictor) value and 1981 players with NaNs for 3 Cone.
True
[16.12903226]
The RMSE of the model on the training data is 1.0048591735576161e-14. The average Powerball Toss of the training data is 35.0. On average, predictions are 2.8710262101646176e-16 from the actual values in the training data
Forty Yard Dash
There are 1343 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1986 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1781 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1808 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1845 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2255 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2278 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 225

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 67 players with a Vertical Jump (predictor) value and 1438 players with NaNs for Vertical Jump.
True
[1.61372904]
The RMSE of the model on the training data is 8.882551013142571. The average Broad Jump of the training data is 100.44199134199133. On average, predictions are 0.08843463669391712 from the actual values in the training data
Wingspan ['Height']
There are 45 players with metric and 2202 NaNs.
['Height', 'Wingspan']
There are 2202 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.84601471]
The RMSE of the model on the training data is 2.1635483655840755. The average Wingspan of the training data is 72.65555555555555. On average, predictions are 0.029778154595896437 from the actual values in the training data
Arm Length []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Weight' 'Vertical Jump' 'Height' 'Forty Yard Dash']
There are 8 players with metric and 2239 NaNs.
['Weight', 'Powerball Toss']
There are 2239 players with a Weight (predictor) value and 0 players with NaNs for Weight.
True
[0.11159691]
The RMSE of the model on the training data is 2.8985915397489608. The average Powerball Toss of the training data is 36.5. On average, predictions are 0.07941346684243727 from the actual values in the training data
Forty Yard Dash
There are 1202 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1689 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1497 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1487 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1505 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2202 NaN values for this metric
There are 0 NaN val

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


Forty Yard Dash ['3 Cone' 'Hand Size']
There are 909 players with metric and 1263 NaNs.
['3 Cone', 'Forty Yard Dash']
There are 7 players with a 3 Cone (predictor) value and 1256 players with NaNs for 3 Cone.
True
[0.3416323]
The RMSE of the model on the training data is 0.20746538668152303. The average Forty Yard Dash of the training data is 4.959129943502822. On average, predictions are 0.04183503740476345 from the actual values in the training data
3 Cone ['Wingspan' 'Hand Size' 'Shuttle' 'Forty Yard Dash']
There are 184 players with metric and 1988 NaNs.
['Wingspan', '3 Cone']
There are 3 players with a Wingspan (predictor) value and 1985 players with NaNs for Wingspan.
True
[0.11883208]
The RMSE of the model on the training data is 0.22830175652750226. The average 3 Cone of the training data is 7.739111111111111. On average, predictions are 0.02949973882655947 from the actual values in the training data
Shuttle ['3 Cone' 'Wingspan']
There are 263 players with metric and 1909 NaNs.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

The RMSE of the model on the training data is 9.568704818899986. The average Broad Jump of the training data is 104.68732394366198. On average, predictions are 0.09140270720884444 from the actual values in the training data
Wingspan ['Height' '3 Cone' 'Shuttle']
There are 12 players with metric and 2160 NaNs.
['Height', 'Wingspan']
There are 2160 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[1.06904232]
The RMSE of the model on the training data is 1.406482235628601. The average Wingspan of the training data is 74.5. On average, predictions are 0.018878956182934243 from the actual values in the training data
Arm Length []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Hand Size ['3 Cone' 'Forty Yard Dash']
There are 11 players with metric and 2161 NaNs.
['3 Cone', 'Hand Size']
There are 175 players with a 3 Cone (predictor) value and 1986 players with NaNs for 3 Cone.
True
[1.15717725]
The RMSE of the model on the training data is 0.4703603351990917. The average Hand Size of the training data is 9.055555555555555. On average, predictions are 0.05194163210787516 from the actual values in the training data
Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Forty Yard Dash
There are 1263 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1988 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1909 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1938 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1943 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2160 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2172 NaN values for this metric
There are 2172 NaN values for this metric
Hand Size
There are 2161 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 2172 NaN values for this metric
There are 2172 NaN values for this metric
{'Height': 'There are 0 NaNs out of 2060 players', 'Weight': 'There are 0 NaNs out of 2060 players', 'Forty Yard Dash': 'There are 1242 NaNs out of 2060 players', '

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Arm Length []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Forty Yard Dash
There are 1242 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1893 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1834 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1862 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1869 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2043 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2060 NaN values for this metric
There are 2060 NaN values for this metric
Hand Size
There are 2043 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 2060 NaN values for this metric
There are 2060 NaN values for this metric
{'Height': 'There are 0 NaNs out of 1304 players', 'Weight': 'There are 0 NaNs out of 1304 players', 'Forty Yard Dash': 'There are 555 NaNs out of 1304 players', '3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


Forty Yard Dash ['3 Cone']
There are 749 players with metric and 555 NaNs.
['3 Cone', 'Forty Yard Dash']
There are 1 players with a 3 Cone (predictor) value and 554 players with NaNs for 3 Cone.
True
[0.39113485]
The RMSE of the model on the training data is 0.4592144843219958. The average Forty Yard Dash of the training data is 5.971006389776359. On average, predictions are 0.07690738450862644 from the actual values in the training data
3 Cone ['Forty Yard Dash' 'Shuttle' 'Hand Size']
There are 314 players with metric and 990 NaNs.
['Forty Yard Dash', '3 Cone']
There are 436 players with a Forty Yard Dash (predictor) value and 554 players with NaNs for Forty Yard Dash.
True
[1.09091231]
The RMSE of the model on the training data is 0.7669150136775134. The average 3 Cone of the training data is 8.71409584664537. On average, predictions are 0.0880085584521944 from the actual values in the training data
Shuttle ['3 Cone']
There are 556 players with metric and 748 NaNs.
['3 Cone', 'Shuttl

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Arm Length']
There are 28 players with metric and 1276 NaNs.
['Arm Length', 'Wingspan']
There are 0 players with a Arm Length (predictor) value and 1276 players with NaNs for Arm Length.
True
[1.34850954]
The RMSE of the model on the training data is 2.28312197464914. The average Wingspan of the training data is 76.5909090909091. On average, predictions are 0.02980930768087898 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
There are 22 players with metric and 1282 NaNs.
['Wingspan', 'Arm Length']
There are 6 players with a Wingspan (predictor) value and 1276 players with NaNs for Wingspan.
True
[0.34220076]
The RMSE of the model on the training data is 1.1501184902910302. The average Arm Length of the training data is 32.28409090909091. On average, predictions are 0.035624930357483514 from the actual values in the training data
Hand Size ['3 Cone']
There are 27 players with metric and 1277 NaNs.
['3 Cone', 'Hand Size']
There are 300 players with

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 555 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 990 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 748 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 831 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 828 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 1276 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 1282 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 1277 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 1303 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 783 players', 'Weight': 'There are 0 NaNs out of 783 players', 'Forty Yard Dash': 'There are 472 NaNs out of 783 players', '3 Cone': 'There

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Arm Length []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 472 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 771 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 671 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 702 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 731 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 783 NaN values for this metric
There are 783 NaN values for this metric
Arm Length
There are 783 NaN values for this metric
There are 783 NaN values for this metric
Hand Size
There are 783 NaN values for this metric
There are 783 NaN values for this metric
Powerball Toss
There are 783 NaN values for this metric
There are 783 NaN values for this metric
{'Height': 'There are 0 NaNs out of 326 players', 'Weight': 'There are 0 NaNs out of 326 players', 'Forty Yard Dash': 'There are 124 NaNs out of 326 players', '3 Cone': 'T

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 27 players with a Vertical Jump (predictor) value and 183 players with NaNs for Vertical Jump.
True
[0.4431227]
The RMSE of the model on the training data is 9.672921990508259. The average Broad Jump of the training data is 91.97477477477476. On average, predictions are 0.10516929249563305 from the actual values in the training data
Wingspan ['Hand Size']
There are 2 players with metric and 324 NaNs.
['Hand Size', 'Wingspan']
There are 0 players with a Hand Size (predictor) value and 324 players with NaNs for Hand Size.
True
[0.06596306]
The RMSE of the model on the training data is 0.0. The average Wingspan of the training data is 76.5. On average, predictions are 0.0 from the actual values in the training data
Arm Length []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Hand Size ['Wingspan']
There are 2 players with metric and 324 NaNs.
['Wingspan', 'Hand Size']
There are 0 players with a Wingspan (predictor) value and 324 players with NaNs for Wingspan.
True
[15.16]
The RMSE of the model on the training data is 1.9093976954970456e-13. The average Hand Size of the training data is 17.42. On average, predictions are 1.096095117966157e-14 from the actual values in the training data
Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 124 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 243 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 178 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 188 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 210 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 324 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 325 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 324 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 326 NaN values for this metric
There are 326 NaN values for this metric
{'Height': 'There are 0 NaNs out of 277 players', 'Weight': 'There are 0 NaNs out of 277 players', 'Forty Yard Dash': 'There are 124 NaNs out of 277 players', '3 Cone': 'There a

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 17 players with a Vertical Jump (predictor) value and 206 players with NaNs for Vertical Jump.
True
[1.36343383]
The RMSE of the model on the training data is 7.5739775507506435. The average Broad Jump of the training data is 91.75952380952381. On average, predictions are 0.08254159607969253 from the actual values in the training data
Wingspan []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Arm Length []
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


Forty Yard Dash
There are 124 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 245 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 197 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 218 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 223 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 276 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 277 NaN values for this metric
There are 277 NaN values for this metric
Hand Size
There are 276 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 277 NaN values for this metric
There are 277 NaN values for this metric
{'Height': 'There are 0 NaNs out of 165 players', 'Weight': 'There are 0 NaNs out of 165 players', 'Forty Yard Dash': 'There are 98 NaNs out of 165 players', '3 Cone': 'There 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

Broad Jump []
Wingspan []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Arm Length []
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []
Forty Yard Dash
There are 98 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 160 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 140 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 150 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 149 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 165 NaN values for this metric
There are 165 NaN values for this metric
Arm Length
There are 165 NaN values for this metric
There are 165 NaN values for this metric
Hand Size
There are 165 NaN values for this metric
There are 165 NaN values for this metric
Powerball Toss
There are 165 NaN values for this metric
There are 165 NaN values for this metric
Wall time: 1min 55s


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

In [212]:
cleaned_combine_metrics_position_dataframes['RB']

Unnamed: 0,Full Name,Position,Height,Weight,Forty Yard Dash,3 Cone,Shuttle,Vertical Jump,Broad Jump,Wingspan,Arm Length,Hand Size,Powerball Toss
23,Camar Wheaton,RB,71.0,190.0,4.500,7.750000,4.654,30.98400,109.968000,73.670213,31.000000,19.925888,37.17440
28,Donovan Edwards,RB,70.5,203.0,4.740,7.597472,4.500,33.10000,107.047600,73.182035,31.000000,9.731697,34.07616
41,Xavian Sorey,RB,74.0,207.0,4.600,7.750000,4.654,31.28608,110.476800,77.000000,32.787915,9.450000,37.39072
45,Will Shipley,RB,71.0,198.0,4.460,6.940000,4.210,36.30000,121.000000,73.000000,32.000000,9.450000,37.35248
48,TreVeyon Henderson,RB,69.5,177.5,4.300,7.750000,4.654,28.83292,105.982664,75.000000,32.094335,9.060000,34.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
71332,Cassius Sadler,RB,64.0,107.0,4.997,7.750000,4.654,18.97952,79.496000,66.835725,31.000000,7.813360,27.76160
71339,Moses Perez,RB,60.0,104.0,4.997,7.750000,4.654,18.97952,79.496000,62.930304,31.000000,7.813360,27.76160
71346,Carmine D’Alessandro,RB,65.0,100.0,4.997,7.750000,4.654,20.29920,85.140000,67.812081,31.000000,7.870000,28.14240
71348,Al'jay Pipkins,RB,61.0,100.0,4.997,7.750000,4.654,18.97952,79.496000,63.906659,31.000000,7.813360,27.76160


In [213]:
wr_metrics = ["Height", "Weight", "Wingspan", "Arm Length", "Hand Size", "Forty Yard Dash", "Shuttle", "3 Cone", "Vertical Jump", "Broad Jump"]
rb_metrics = ["Height", "Weight","Wingspan","Hand Size","Forty Yard Dash","Shuttle","3 Cone","Vertical Jump"]
lb_metrics = ["Height","Weight","Arm Length","Shuttle","3 Cone","Vertical Jump"]
qb_metrics = ["Height","Weight","Wingspan","Hand Size","Powerball Toss"]
de_metrics = ["Height","Weight","Wingspan","Arm Length","Forty Yard Dash","Shuttle","Vertical Jump","Powerball Toss"]
cb_metrics = ["Height","Weight","Wingspan","Arm Length","Forty Yard Dash","Shuttle","3 Cone","Vertical Jump","Broad Jump"]
ol_metrics = ["Height","Weight","Wingspan","Arm Length","Vertical Jump","Powerball Toss"]
dt_metrics = ["Height","Weight","Wingspan","Arm Length","Shuttle","3 Cone","Vertical Jump","Broad Jump","Powerball Toss"]
s_metrics = ["Height","Weight","Wingspan","Arm Length","Forty Yard Dash","Shuttle","3 Cone","Vertical Jump"]
ot_metrics = ["Height","Weight","Wingspan","Arm Length","Vertical Jump","Powerball Toss"]
og_metrics = ["Height","Weight","Powerball Toss"]
te_metrics = ["Height","Weight","Forty Yard Dash","Shuttle","3 Cone","Vertical Jump"]
ath_metrics = ["Height","Weight","Forty Yard Dash","Shuttle","3 Cone","Vertical Jump"]
db_metrics = ["Height","Weight","Forty Yard Dash","Vertical Jump"]
dl_metrics = ["Height","Weight","Vertical Jump","Powerball Toss"]
oc_metrics = ["Height","Weight","Wingspan","Arm Length","Hand Size","Forty Yard Dash","Powerball Toss"]
k_metrics = ["Height","Weight","Vertical Jump"]
fb_metrics = ["Height","Weight","Forty Yard Dash","Vertical Jump"]
ls_metrics = ["Height","Weight"]
p_metrics = ["Height","Weight","Vertical Jump"]

In [214]:
all_position_metrics = [wr_metrics, rb_metrics, lb_metrics, qb_metrics, 
                       de_metrics, cb_metrics, ol_metrics, dt_metrics, 
                       s_metrics, ot_metrics, og_metrics, te_metrics, 
                       ath_metrics, db_metrics, dl_metrics, oc_metrics, 
                       k_metrics, fb_metrics, ls_metrics, p_metrics]

In [215]:
%%time
cleaned_chosen_metrics_position_dataframes = {}
for position, position_metrics in zip(positions, all_position_metrics):
    cleaned_chosen_metrics_position_dataframes[position] = master_fillna(data, position, position_metrics)

{'Height': 'There are 0 NaNs out of 12359 players', 'Weight': 'There are 0 NaNs out of 12359 players', 'Wingspan': 'There are 11947 NaNs out of 12359 players', 'Arm Length': 'There are 12082 NaNs out of 12359 players', 'Hand Size': 'There are 11963 NaNs out of 12359 players', 'Forty Yard Dash': 'There are 6235 NaNs out of 12359 players', 'Shuttle': 'There are 8483 NaNs out of 12359 players', '3 Cone': 'There are 9546 NaNs out of 12359 players', 'Vertical Jump': 'There are 8608 NaNs out of 12359 players', 'Broad Jump': 'There are 8801 NaNs out of 12359 players'}
Wingspan ['Height' 'Arm Length' 'Weight']
There are 412 players with metric and 11947 NaNs.
['Height', 'Wingspan']
There are 11947 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.95391507]
The RMSE of the model on the training data is 2.3533220050715538. The average Wingspan of the training data is 73.49029126213593. On average, predictions are 0.03202221633164278 from the actual values in the

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Forty Yard Dash []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Shuttle ['3 Cone']
There are 3876 players with metric and 8483 NaNs.
['3 Cone', 'Shuttle']
There are 35 players with a 3 Cone (predictor) value and 8448 players with NaNs for 3 Cone.
True
[0.38332496]
The RMSE of the model on the training data is 0.24604069736686787. The average Shuttle of the training data is 4.6740147588193075. On average, predictions are 0.05264011991032302 from the actual values in the training data
3 Cone ['Shuttle']
There are 2813 players with metric and 9546 NaNs.
['Shuttle', '3 Cone']
There are 1098 players with a Shuttle (predictor) value and 8448 players with NaNs for Shuttle.
True
[0.9804151]
The RMSE of the model on the training data is 0.3934851927182003. The average 3 Cone of the training data is 7.735904607631389. On average, predictions are 0.05086479379929676 from the actual values in the training data
Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan
There are 11947 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 12082 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 11963 NaN values for this metric
There are 0 NaN values for this metric
Forty Yard Dash
There are 6235 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 8483 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 9546 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 8608 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 8801 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 9080 players', 'Weight': 'There are 0 NaNs out of 9080 players', 'Wingspan': 'There are 8810 NaNs out of 9080 players', 'Hand Size': 'There are 8822 NaNs out of 9080 players', 'Forty Yard Dash': 'There are 4197 NaNs out of 9080

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash ['3 Cone' 'Shuttle']
There are 4883 players with metric and 4197 NaNs.
['3 Cone', 'Forty Yard Dash']
There are 63 players with a 3 Cone (predictor) value and 4134 players with NaNs for 3 Cone.
True
[0.33403925]
The RMSE of the model on the training data is 0.23836276682738344. The average Forty Yard Dash of the training data is 5.023432175154691. On average, predictions are 0.04745018117419756 from the actual values in the training data
Shuttle ['3 Cone' 'Forty Yard Dash']
There are 3080 players with metric and 6000 NaNs.
['3 Cone', 'Shuttle']
There are 21 players with a 3 Cone (predictor) value and 5979 players with NaNs for 3 Cone.
True
[0.37746799]
The RMSE of the model on the training data is 0.23545532572356265. The average Shuttle of the training data is 4.682819412039187. On average, predictions are 0.05028067602142081 from the actual values in the training data
3 Cone ['Shuttle' 'Forty Yard Dash']
There are 2164 players with metric and 6916 NaNs.
['Shuttle', '3 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan
There are 8810 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 8822 NaN values for this metric
There are 0 NaN values for this metric
Forty Yard Dash
There are 4197 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 6000 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 6916 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 6080 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 8832 players', 'Weight': 'There are 0 NaNs out of 8832 players', 'Arm Length': 'There are 8706 NaNs out of 8832 players', 'Shuttle': 'There are 5721 NaNs out of 8832 players', '3 Cone': 'There are 6776 NaNs out of 8832 players', 'Vertical Jump': 'There are 5899 NaNs out of 8832 players'}
Arm Length []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Shuttle ['3 Cone']
There are 3111 players with metric and 5721 NaNs.
['3 Cone', 'Shuttle']
There are 26 players with a 3 Cone (predictor) value and 5695 players with NaNs for 3 Cone.
True
[0.38032846]
The RMSE of the model on the training data is 0.2942247462739102. The average Shuttle of the training data is 4.792373399014775. On average, predictions are 0.061394370132844295 from the actual values in the training data
3 Cone ['Shuttle']
There are 2056 players with metric and 6776 NaNs.
['Shuttle', '3 Cone']
There are 1081 players with a Shuttle (predictor) value and 5695 players with NaNs for Shuttle.
True
[0.922145]
The RMSE of the model on the training data is 0.45814124532771583. The average 3 Cone of the training data is 7.950379802955665. On average, predictions are 0.05762507662305584 from the actual values in the training data
Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Arm Length
There are 8706 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 5721 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 6776 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 5899 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 6345 players', 'Weight': 'There are 0 NaNs out of 6345 players', 'Wingspan': 'There are 6025 NaNs out of 6345 players', 'Hand Size': 'There are 6042 NaNs out of 6345 players', 'Powerball Toss': 'There are 6320 NaNs out of 6345 players'}
Wingspan ['Height' 'Weight']
There are 320 players with metric and 6025 NaNs.
['Height', 'Wingspan']
There are 6025 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.81524717]
The RMSE of the model on the training data is 2.2123412499009905. The average Wingspan of the training data is 74.00703125. On average, predictions are 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Height' 'Weight']
There are 25 players with metric and 6320 NaNs.
['Height', 'Powerball Toss']
There are 6320 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.93695806]
The RMSE of the model on the training data is 2.892647722403424. The average Powerball Toss of the training data is 33.128. On average, predictions are 0.08731730627878 from the actual values in the training data
Wingspan
There are 6025 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 6042 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 6320 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 4229 players', 'Weight': 'There are 0 NaNs out of 4229 players', 'Wingspan': 'There are 4034 NaNs out of 4229 players', 'Arm Length': 'There are 4078 NaNs out of 4229 players', 'Forty Yard Dash': 'There are 1971 NaNs out of 4229 players', 'Shuttle': 'Th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Shuttle []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Vertical Jump ['Powerball Toss']
There are 1595 players with metric and 2634 NaNs.
['Powerball Toss', 'Vertical Jump']
There are 1 players with a Powerball Toss (predictor) value and 2633 players with NaNs for Powerball Toss.
True
[0.40733008]
The RMSE of the model on the training data is 2.872344778981971. The average Vertical Jump of the training data is 26.744444444444444. On average, predictions are 0.1073996801447351 from the actual values in the training data
Powerball Toss ['Vertical Jump']
There are 10 players with metric and 4219 NaNs.
['Vertical Jump', 'Powerball Toss']
There are 1586 players with a Vertical Jump (predictor) value and 2633 players with NaNs for Vertical Jump.
True
[0.53618629]
The RMSE of the model on the training data is 3.2954989810234254. The average Powerball Toss of the training data is 38.77777777777778. On average, predictions are 0.08498421441034622 from the actual values in the training data
Wingspan
There are 4034 NaN values for this metric
There ar

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.96738252]
The RMSE of the model on the training data is 0.3879621482020245. The average 3 Cone of the training data is 7.768277819833468. On average, predictions are 0.049941847755689744 from the actual values in the training data
Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan
There are 3690 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 3782 NaN values for this metric
There are 0 NaN values for this metric
Forty Yard Dash
There are 1743 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 2229 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 2645 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 2229 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 2305 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 3781 players', 'Weight': 'There are 0 NaNs out of 3781 players', 'Wingspan': 'There are 3773 NaNs out of 3781 players', 'Arm Length': 'There are 3781 NaNs out of 3781 players', 'Vertical Jump': 'There are 3573 NaNs out of 3781 players', 'Powerball Toss': 'There are 3781 NaNs out of 3781 players'}
Wingspan ['Height']
The

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan
There are 3773 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 3781 NaN values for this metric
There are 3781 NaN values for this metric
Vertical Jump
There are 3573 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 3781 NaN values for this metric
There are 3781 NaN values for this metric
{'Height': 'There are 0 NaNs out of 2994 players', 'Weight': 'There are 0 NaNs out of 2994 players', 'Wingspan': 'There are 2869 NaNs out of 2994 players', 'Arm Length': 'There are 2915 NaNs out of 2994 players', 'Shuttle': 'There are 1785 NaNs out of 2994 players', '3 Cone': 'There are 2220 NaNs out of 2994 players', 'Vertical Jump': 'There are 1933 NaNs out of 2994 players', 'Broad Jump': 'There are 1958 NaNs out of 2994 players', 'Powerball Toss': 'There are 2987 NaNs out of 2994 players'}
Wingspan ['Powerball Toss' 'Height' 'Arm Length']
There are 125 players with metric and 2869 NaNs.
['Powerball Toss', 'Wing

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Height' 'Wingspan' 'Weight' 'Shuttle']
There are 7 players with metric and 2987 NaNs.
['Height', 'Powerball Toss']
There are 2987 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[1.50314465]
The RMSE of the model on the training data is 2.737326545483549. The average Powerball Toss of the training data is 35.857142857142854. On average, predictions are 0.07633978413699141 from the actual values in the training data
Wingspan
There are 2869 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2915 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1785 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 2220 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1933 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1958 NaN values for this metric
There are 0 NaN values f

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


Wingspan ['Height' 'Arm Length']
There are 117 players with metric and 2754 NaNs.
['Height', 'Wingspan']
There are 2754 players with a Height (predictor) value and 0 players with NaNs for Height.
True
[0.85510438]
The RMSE of the model on the training data is 2.1250495958460647. The average Wingspan of the training data is 73.78418803418803. On average, predictions are 0.028800880682747627 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
There are 87 players with metric and 2784 NaNs.
['Wingspan', 'Arm Length']
There are 30 players with a Wingspan (predictor) value and 2754 players with NaNs for Wingspan.
True
[0.28949406]
The RMSE of the model on the training data is 1.2548922810492908. The average Arm Length of the training data is 31.597701149425287. On average, predictions are 0.0397146702260052 from the actual values in the training data
Forty Yard Dash ['3 Cone']
There are 1575 players with metric and 1296 NaNs.
['3 Cone', 'Forty Yard Dash']
There are 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

3 Cone ['Shuttle' 'Forty Yard Dash']
There are 774 players with metric and 2097 NaNs.
['Shuttle', '3 Cone']
There are 405 players with a Shuttle (predictor) value and 1692 players with NaNs for Shuttle.
True
[1.08639262]
The RMSE of the model on the training data is 0.36549165785317933. The average 3 Cone of the training data is 7.733736979166664. On average, predictions are 0.0472593855774705 from the actual values in the training data
Vertical Jump ['Weight']
There are 1100 players with metric and 1771 NaNs.
['Weight', 'Vertical Jump']
There are 1771 players with a Weight (predictor) value and 0 players with NaNs for Weight.
True
[0.10556776]
The RMSE of the model on the training data is 4.738640346020219. The average Vertical Jump of the training data is 26.54094727272729. On average, predictions are 0.1785407392331286 from the actual values in the training data
Wingspan
There are 2754 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2784 NaN va

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


Wingspan ['Arm Length']
There are 123 players with metric and 2476 NaNs.
['Arm Length', 'Wingspan']
There are 0 players with a Arm Length (predictor) value and 2476 players with NaNs for Arm Length.
True
[1.03006815]
The RMSE of the model on the training data is 2.2260404014498616. The average Wingspan of the training data is 79.46220930232558. On average, predictions are 0.028013824697228917 from the actual values in the training data
Arm Length ['Wingspan']
There are 86 players with metric and 2513 NaNs.
['Wingspan', 'Arm Length']
There are 37 players with a Wingspan (predictor) value and 2476 players with NaNs for Wingspan.
True
[0.35796096]
The RMSE of the model on the training data is 1.3122539700270295. The average Arm Length of the training data is 34.122093023255815. On average, predictions are 0.03845760484659211 from the actual values in the training data
Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Powerball Toss ['Weight']
There are 7 players with metric and 2592 NaNs.
['Weight', 'Powerball Toss']
There are 2592 players with a Weight (predictor) value and 0 players with NaNs for Weight.
True
[0.19540902]
The RMSE of the model on the training data is 4.958749545785558. The average Powerball Toss of the training data is 30.071428571428573. On average, predictions are 0.16489903477671689 from the actual values in the training data
Wingspan
There are 2476 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2513 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1855 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 2592 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 2421 players', 'Weight': 'There are 0 NaNs out of 2421 players', 'Powerball Toss': 'There are 2416 NaNs out of 2421 players'}
Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percen

Powerball Toss
There are 2416 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 2301 players', 'Weight': 'There are 0 NaNs out of 2301 players', 'Forty Yard Dash': 'There are 1343 NaNs out of 2301 players', 'Shuttle': 'There are 1781 NaNs out of 2301 players', '3 Cone': 'There are 1986 NaNs out of 2301 players', 'Vertical Jump': 'There are 1808 NaNs out of 2301 players'}
Forty Yard Dash ['3 Cone' 'Shuttle']
There are 958 players with metric and 1343 NaNs.
['3 Cone', 'Forty Yard Dash']
There are 9 players with a 3 Cone (predictor) value and 1334 players with NaNs for 3 Cone.
True
[0.44688351]
The RMSE of the model on the training data is 0.29906932973768674. The average Forty Yard Dash of the training data is 5.341457516339868. On average, predictions are 0.05599021031671863 from the actual values in the training data
Shuttle ['3 Cone' 'Forty Yard Dash']
There are 520 players with metric and 1781 NaNs.
['3 Cone', 'Shuttle']
There are 7

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

{'Height': 'There are 0 NaNs out of 2247 players', 'Weight': 'There are 0 NaNs out of 2247 players', 'Forty Yard Dash': 'There are 1202 NaNs out of 2247 players', 'Shuttle': 'There are 1497 NaNs out of 2247 players', '3 Cone': 'There are 1689 NaNs out of 2247 players', 'Vertical Jump': 'There are 1487 NaNs out of 2247 players'}
Forty Yard Dash ['3 Cone' 'Shuttle']
There are 1045 players with metric and 1202 NaNs.
['3 Cone', 'Forty Yard Dash']
There are 27 players with a 3 Cone (predictor) value and 1175 players with NaNs for 3 Cone.
True
[0.51127988]
The RMSE of the model on the training data is 0.33975283400619066. The average Forty Yard Dash of the training data is 5.111137476459507. On average, predictions are 0.06647303766940309 from the actual values in the training data
Shuttle ['3 Cone' 'Forty Yard Dash']
There are 750 players with metric and 1497 NaNs.
['3 Cone', 'Shuttle']
There are 5 players with a 3 Cone (predictor) value and 1492 players with NaNs for 3 Cone.
True
[0.459848

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Forty Yard Dash
There are 1202 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1497 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1689 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1487 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 2172 players', 'Weight': 'There are 0 NaNs out of 2172 players', 'Forty Yard Dash': 'There are 1263 NaNs out of 2172 players', 'Vertical Jump': 'There are 1938 NaNs out of 2172 players'}
Forty Yard Dash []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Vertical Jump ['Weight']
There are 234 players with metric and 1938 NaNs.
['Weight', 'Vertical Jump']
There are 1938 players with a Weight (predictor) value and 0 players with NaNs for Weight.
True
[0.09552159]
The RMSE of the model on the training data is 4.178918444258799. The average Vertical Jump of the training data is 27.628931623931617. On average, predictions are 0.15125153955063195 from the actual values in the training data
Forty Yard Dash
There are 1263 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1938 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 2060 players', 'Weight': 'There are 0 NaNs out of 2060 players', 'Vertical Jump': 'There are 1862 NaNs out of 2060 players', 'Powerball Toss': 'There are 2060 NaNs out of 2060 players'}
Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Vertical Jump
There are 1862 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 2060 NaN values for this metric
There are 2060 NaN values for this metric
{'Height': 'There are 0 NaNs out of 1304 players', 'Weight': 'There are 0 NaNs out of 1304 players', 'Wingspan': 'There are 1276 NaNs out of 1304 players', 'Arm Length': 'There are 1282 NaNs out of 1304 players', 'Hand Size': 'There are 1277 NaNs out of 1304 players', 'Forty Yard Dash': 'There are 555 NaNs out of 1304 players', 'Powerball Toss': 'There are 1303 NaNs out of 1304 players'}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[metric] = completed_metrics[metric]


Wingspan ['Arm Length']
There are 28 players with metric and 1276 NaNs.
['Arm Length', 'Wingspan']
There are 0 players with a Arm Length (predictor) value and 1276 players with NaNs for Arm Length.
True
[1.34850954]
The RMSE of the model on the training data is 2.28312197464914. The average Wingspan of the training data is 76.5909090909091. On average, predictions are 0.02980930768087898 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
There are 22 players with metric and 1282 NaNs.
['Wingspan', 'Arm Length']
There are 6 players with a Wingspan (predictor) value and 1276 players with NaNs for Wingspan.
True
[0.34220076]
The RMSE of the model on the training data is 1.1501184902910302. The average Arm Length of the training data is 32.28409090909091. On average, predictions are 0.035624930357483514 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Forty Yard Dash []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan
There are 1276 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 1282 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 1277 NaN values for this metric
There are 0 NaN values for this metric
Forty Yard Dash
There are 555 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 1303 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 783 players', 'Weight': 'There are 0 NaNs out of 783 players', 'Vertical Jump': 'There are 702 NaNs out of 783 players'}
Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Vertical Jump
There are 702 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 326 players', 'Weight': 'There are 0 NaNs out of 326 players', 'Forty Yard Dash': 'There are 124 NaNs out of 326 players', 'Vertical Jump': 'There are 188 NaNs out of 326 players'}
Forty Yard Dash []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Vertical Jump []
Forty Yard Dash
There are 124 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 188 NaN values for this metric
There are 0 NaN values for this metric
{'Height': 'There are 0 NaNs out of 277 players', 'Weight': 'There are 0 NaNs out of 277 players'}
{'Height': 'There are 0 NaNs out of 165 players', 'Weight': 'There are 0 NaNs out of 165 players', 'Vertical Jump': 'There are 150 NaNs out of 165 players'}
Vertical Jump []
Vertical Jump
There are 150 NaN values for this metric
There are 0 NaN values for this metric
Wall time: 1min 4s


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

In [216]:
cleaned_chosen_metrics_position_dataframes['WR']

Unnamed: 0,Full Name,Position,Height,Weight,Wingspan,Arm Length,Hand Size,Forty Yard Dash,Shuttle,3 Cone,Vertical Jump,Broad Jump
3,Emeka Egbuka,WR,72.50,190.0,75.000000,32.193956,9.00000,4.85900,4.200,7.083000,35.1000,112.0000
20,Sage Ryan,WR,70.75,197.0,73.000000,35.000000,20.00000,4.79400,4.133,7.348000,31.5000,117.0000
27,Jacorey Brooks,WR,74.00,180.0,76.000000,32.591408,8.66000,4.83000,4.340,7.408431,32.8000,106.7080
33,Kevin Coleman,WR,70.00,157.0,72.818846,31.500000,9.39296,4.61000,4.160,7.231957,32.9000,105.7148
37,Luther Burden,WR,74.00,195.0,76.634506,31.500000,22.33400,4.52664,4.650,7.710000,33.1680,114.3360
...,...,...,...,...,...,...,...,...,...,...,...,...
71363,Tyler Grake,WR,61.00,91.0,64.233611,31.500000,7.81336,6.34040,4.650,7.710000,19.2688,81.2572
71365,Carl Collins III,WR,56.00,86.0,59.464035,31.500000,7.81336,6.34040,4.650,7.710000,19.2688,81.2572
71366,Antrone\t Coston,WR,61.00,85.0,64.233611,31.500000,7.81336,6.34040,4.650,7.710000,19.2688,81.2572
71367,Gerald\t Lampkin,WR,59.00,84.0,62.325780,31.500000,7.81336,6.34040,4.650,7.710000,19.2688,81.2572


In [217]:
column_names = cleaned_combine_metrics_position_dataframes['DE'].columns
df = pd.DataFrame(columns = column_names)

In [218]:
for position in positions:
    df = df.append(cleaned_combine_metrics_position_dataframes[position])

In [219]:
df = df.sort_index()

In [220]:
df.to_csv("zcruit_and_combine.csv")

In [222]:
still_nan_events_for_positions_zcruit = {}
for position in positions:
    series = df[df['Position'] == position].isna().sum() > 0
    still_nan_events_for_positions_zcruit[position] = series[series].index.values

In [223]:
still_nan_events_for_positions_zcruit

{'WR': array([], dtype=object),
 'RB': array([], dtype=object),
 'LB': array([], dtype=object),
 'QB': array([], dtype=object),
 'DE': array([], dtype=object),
 'CB': array([], dtype=object),
 'OL': array(['Arm Length', 'Powerball Toss'], dtype=object),
 'DT': array([], dtype=object),
 'S': array([], dtype=object),
 'OT': array([], dtype=object),
 'OG': array([], dtype=object),
 'TE': array([], dtype=object),
 'ATH': array(['Arm Length'], dtype=object),
 'DB': array(['Arm Length', 'Powerball Toss'], dtype=object),
 'DL': array(['Arm Length', 'Powerball Toss'], dtype=object),
 'OC': array([], dtype=object),
 'K': array(['Wingspan', 'Arm Length', 'Hand Size', 'Powerball Toss'],
       dtype=object),
 'FB': array(['Powerball Toss'], dtype=object),
 'LS': array(['Arm Length', 'Powerball Toss'], dtype=object),
 'P': array(['Wingspan', 'Arm Length', 'Hand Size', 'Powerball Toss'],
       dtype=object)}

In [227]:
data.isna().sum()

Full Name                     0
Last Name                     0
Grad Year                     0
Position                    248
Height                        0
Height Source                 0
Weight                        0
Weight Source                 0
Zcruit Rating             49474
Committed To              66941
Commitment Date           66941
Signed To                 68217
Offers                    57754
Visits                    69586
Forty Yard Dash           35994
Forty Yard Dash Source    35994
Shuttle                   48741
Shuttle Source            48741
Vertical Jump             50106
Vertical Jump Source      50106
Broad Jump                51022
Broad Jump Source         51022
3 Cone                    56172
3 Cone Source             56172
Wingspan                  69035
Wingspan Source           69035
Arm Length                69927
Arm Length Source         69927
Hand Size                 69140
Hand Size Source          69140
Powerball Toss            71221
Powerbal

### Handling NaNs - Track and Field Data

In [24]:
zcruit_data = pd.read_csv("zcruit_and_combine.csv")

In [33]:
zcruit_data = zcruit_data.drop(columns = 'Unnamed: 0')

In [37]:
original_data = pd.read_csv("ZCruit_Data.csv", error_bad_lines=False)

b'Skipping line 1845: expected 70 fields, saw 72\nSkipping line 3345: expected 70 fields, saw 72\n'
b'Skipping line 11921: expected 70 fields, saw 72\nSkipping line 12609: expected 70 fields, saw 72\nSkipping line 13420: expected 70 fields, saw 72\nSkipping line 14108: expected 70 fields, saw 72\nSkipping line 14512: expected 70 fields, saw 72\n'
b'Skipping line 16512: expected 70 fields, saw 72\nSkipping line 21242: expected 70 fields, saw 72\nSkipping line 23743: expected 70 fields, saw 72\n'
b'Skipping line 58041: expected 70 fields, saw 72\nSkipping line 60541: expected 70 fields, saw 72\n'
b'Skipping line 70025: expected 70 fields, saw 72\nSkipping line 72525: expected 70 fields, saw 72\n'
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [40]:
original_data.head(5)

Unnamed: 0,Zcruit ID,Full Name,First Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,...,GPA,ACT,SAT,Twitter,Instagram,Snapchat,247Sports,Rivals,Hudl,Zcruit
0,c4fcba47-0337-4047-bc4d-0187da6f698e,J.T. Tuimoloau,J.T.,Tuimoloau,2021,DE,"6' 4""",national_preps,275.0,national_preps,...,3.0,,,JT_tuimoloau,,,https://247sports.com/player/46048440/,https://n.rivals.com/content/prospects/208999/,,https://zcruit.com/prospects/c4fcba47-0337-404...
1,1db04d99-cca6-4bfc-8a5a-3d40ae1795e0,JC Latham,JC,Latham,2021,OT,"6' 6""",national_preps,279.0,nike_opening,...,2.35,,,TKJaayy,,,https://247sports.com/player/46049894/,https://n.rivals.com/content/prospects/209086/,www.hudl.com/profiles/feed-user/3-9698914,https://zcruit.com/prospects/1db04d99-cca6-4bf...
2,2481c4e2-f105-4e9f-b85d-0925a343910b,Korey Foreman,Korey,Foreman,2021,DE,"6' 4""",nike_opening,248.0,nike_opening,...,3.03,,,koreyforeman54,Koreyforeman_54,Korey4Man,https://247sports.com/player/46056100/,https://n.rivals.com/content/prospects/216686/,www.hudl.com/profiles/feed-user/3-10337933,https://zcruit.com/prospects/2481c4e2-f105-4e9...
3,90d817c7-b0d4-40c7-92d5-585d2bb09425,Caleb Williams,Caleb,Williams,2021,QB,"6' 1""",national_preps,209.0,nike_opening,...,,,,CALEBcsw,,,https://247sports.com/player/46040610/,https://n.rivals.com/content/prospects/210175/,www.hudl.com/profiles/feed-user/3-6269274,https://zcruit.com/prospects/90d817c7-b0d4-40c...
4,7351f453-1854-48ba-aa2c-e6eb9f4b9859,Dallas Turner,Dallas,Turner,2021,DE,"6' 2.5""",rivals_camp,223.0,rivals_camp,...,3.7,,,UnoErra,,,https://247sports.com/player/46051516/,https://n.rivals.com/content/prospects/211051/,www.hudl.com/profiles/feed-user/3-6475053,https://zcruit.com/prospects/7351f453-1854-48b...


In [45]:
data.head(5)

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,...,Hand Size Source,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38
0,Korey Foreman,Foreman,2021,DE,76.0,nike_opening,248.0,nike_opening,96.697,USC,...,,44.5,nike_opening,98.76,nike_opening,3.03,,,,
1,J.T. Tuimoloau,Tuimoloau,2021,DE,76.0,national_preps,275.0,national_preps,96.697,,...,,,,,,3.0,,,,
2,JC Latham,Latham,2021,OT,78.0,national_preps,279.0,nike_opening,96.697,Alabama,...,,,,,,2.35,,,,
3,Emeka Egbuka,Egbuka,2021,WR,72.5,nike_opening,190.0,nike_opening,96.3939,Ohio State,...,national_combine,41.0,nike_opening,,,4.0,,,,
4,Dallas Turner,Turner,2021,DE,74.5,rivals_camp,223.0,rivals_camp,96.3939,Alabama,...,rivals_camp,,,,,3.7,,,,


In [46]:
data['Grad Year'].value_counts()

2021    36641
2022    22121
2023     9684
2024     2924
Name: Grad Year, dtype: int64

In [60]:
data

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,...,Hand Size Source,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38
0,Korey Foreman,Foreman,2021,DE,76.0,nike_opening,248.0,nike_opening,96.6970,USC,...,,44.5,nike_opening,98.76,nike_opening,3.03,,,,
1,J.T. Tuimoloau,Tuimoloau,2021,DE,76.0,national_preps,275.0,national_preps,96.6970,,...,,,,,,3.00,,,,
2,JC Latham,Latham,2021,OT,78.0,national_preps,279.0,nike_opening,96.6970,Alabama,...,,,,,,2.35,,,,
3,Emeka Egbuka,Egbuka,2021,WR,72.5,nike_opening,190.0,nike_opening,96.3939,Ohio State,...,national_combine,41.0,nike_opening,,,4.00,,,,
4,Dallas Turner,Turner,2021,DE,74.5,rivals_camp,223.0,rivals_camp,96.3939,Alabama,...,rivals_camp,,,,,3.70,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71365,Carl Collins III,Collins III,2022,WR,56.0,rivals,86.0,rivals,,,...,,,,,,,,,,
71366,Antrone\t Coston,Coston,2024,WR,61.0,rivals,85.0,rivals,,,...,,,,,,,,,,
71367,Gerald\t Lampkin,Lampkin,2024,WR,59.0,rivals,84.0,rivals,,,...,,,,,,,,,,
71368,Jaeden\t Hamm,Hamm,2024,WR,58.0,rivals,80.0,rivals,,,...,,,,,,,,,,


In [59]:
original_data

Unnamed: 0,Zcruit ID,Full Name,First Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,...,GPA,ACT,SAT,Twitter,Instagram,Snapchat,247Sports,Rivals,Hudl,Zcruit
0,c4fcba47-0337-4047-bc4d-0187da6f698e,J.T. Tuimoloau,J.T.,Tuimoloau,2021,DE,"6' 4""",national_preps,275.0,national_preps,...,3.00,,,JT_tuimoloau,,,https://247sports.com/player/46048440/,https://n.rivals.com/content/prospects/208999/,,https://zcruit.com/prospects/c4fcba47-0337-404...
1,1db04d99-cca6-4bfc-8a5a-3d40ae1795e0,JC Latham,JC,Latham,2021,OT,"6' 6""",national_preps,279.0,nike_opening,...,2.35,,,TKJaayy,,,https://247sports.com/player/46049894/,https://n.rivals.com/content/prospects/209086/,www.hudl.com/profiles/feed-user/3-9698914,https://zcruit.com/prospects/1db04d99-cca6-4bf...
2,2481c4e2-f105-4e9f-b85d-0925a343910b,Korey Foreman,Korey,Foreman,2021,DE,"6' 4""",nike_opening,248.0,nike_opening,...,3.03,,,koreyforeman54,Koreyforeman_54,Korey4Man,https://247sports.com/player/46056100/,https://n.rivals.com/content/prospects/216686/,www.hudl.com/profiles/feed-user/3-10337933,https://zcruit.com/prospects/2481c4e2-f105-4e9...
3,90d817c7-b0d4-40c7-92d5-585d2bb09425,Caleb Williams,Caleb,Williams,2021,QB,"6' 1""",national_preps,209.0,nike_opening,...,,,,CALEBcsw,,,https://247sports.com/player/46040610/,https://n.rivals.com/content/prospects/210175/,www.hudl.com/profiles/feed-user/3-6269274,https://zcruit.com/prospects/90d817c7-b0d4-40c...
4,7351f453-1854-48ba-aa2c-e6eb9f4b9859,Dallas Turner,Dallas,Turner,2021,DE,"6' 2.5""",rivals_camp,223.0,rivals_camp,...,3.70,,,UnoErra,,,https://247sports.com/player/46051516/,https://n.rivals.com/content/prospects/211051/,www.hudl.com/profiles/feed-user/3-6475053,https://zcruit.com/prospects/7351f453-1854-48b...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110485,555493d1-4238-4a54-b006-eb4022a22807,Zyreon Rogers,Zyreon,Rogers,2022,DL,"5' 11""",national_preps,280.0,national_preps,...,1.70,,,,,,,,,https://zcruit.com/prospects/555493d1-4238-4a5...
110486,e0c678c7-8ef9-4d95-a988-456b45811eff,Zyshawn Witcher,Zyshawn,Witcher,2022,QB,"6' 0""",ncsa,156.0,ncsa,...,3.00,,,,,,,,,https://zcruit.com/prospects/e0c678c7-8ef9-4d9...
110487,5c88da3d-0953-4a48-beb3-3f8f398c31b2,Zyterius Watts,Zyterius,Watts,2022,RB,"5' 8""",national_preps,175.0,national_preps,...,2.00,,,atkwavy,,,,,,https://zcruit.com/prospects/5c88da3d-0953-4a4...
110488,82a03440-f03d-4d70-89f4-d378fc21a401,Zytwan Kitchen,Zytwan,Kitchen,2021,RB,"5' 6""",rivals_combine,157.0,rivals_combine,...,3.30,,,,,,,https://n.rivals.com/content/prospects/260969/,,https://zcruit.com/prospects/82a03440-f03d-4d7...


In [61]:
idk = data.merge(original_data, how = "left", on = ["Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"], copy = False)

In [76]:
data_ryan_fill = data.fillna(-1000)
original_data_ryan_fill = original_data.fillna(-1000)

In [77]:
data_ryan_fill.shape[0]

71370

In [96]:
data_ryan_fill.drop_duplicates().shape[0]

71362

In [102]:
original_data.columns

Index(['Zcruit ID', 'Full Name', 'First Name', 'Last Name', 'Grad Year',
       'Position', 'Height', 'Height Source', 'Weight', 'Weight Source',
       'Zcruit Rating', 'Status', 'Committed To', 'Commitment Date',
       'Signed To', 'Offers', 'Visits', 'Athlete Address', 'Athlete City',
       'Athlete State', 'Athlete Zip Code', 'Athlete Cell Phone',
       'Athlete Home Phone', 'DOB', 'Jersey Number', 'Parent Name',
       'Parent Phone', 'Parent Email', 'HS/JuCo', 'School Name',
       'School County', 'School Address', 'School City', 'School State',
       'School Zip', 'ETS #', 'Coach Name', 'Coach Cell Phone',
       'Coach Home Phone', 'Coach Email', 'Forty Yard Dash',
       'Forty Yard Dash Source', 'Shuttle', 'Shuttle Source', 'Vertical Jump',
       'Vertical Jump Source', 'Broad Jump', 'Broad Jump Source', '3 Cone',
       '3 Cone Source', 'Wingspan', 'Wingspan Source', 'Arm Length',
       'Arm Length Source', 'Hand Size', 'Hand Size Source', 'Powerball Toss',
       'Po

In [103]:
good_columns = ["Zcruit ID", "Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"]
original_data = original_data[good_columns]

In [108]:
data.shape[0]

71370

In [109]:
original_data["Full Name"].value_counts()

Connor Smith        20
Jordan Smith        16
Donovan Williams    16
Jalen Johnson       15
Elijah Smith        14
                    ..
Micheal Graham       1
Luke Morley          1
Ryan Harden          1
John Alcalar         1
Lee Thompson         1
Name: Full Name, Length: 67747, dtype: int64

In [111]:
connor_smith_original_data = original_data[original_data["Full Name"] == "Connor Smith"]
connor_smith_data = data[data["Full Name"] == "Connor Smith"]

In [118]:
connor_smith_merge = connor_smith_data.merge(connor_smith_original_data, how = "left", on = ["Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"]).drop_duplicates()

In [125]:
connor_smith_merge['Grad Year'].values == connor_smith_data['Grad Year'].values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [123]:
connor_smith_data

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,Commitment Date,Signed To,Offers,Visits,Forty Yard Dash,Forty Yard Dash Source,Shuttle,Shuttle Source,Vertical Jump,Vertical Jump Source,Broad Jump,Broad Jump Source,3 Cone,3 Cone Source,Wingspan,Wingspan Source,Arm Length,Arm Length Source,Hand Size,Hand Size Source,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38
7562,Connor Smith,Smith,2021,OT,77.0,twoFourSeven,275.0,twoFourSeven,54.25,Yale,7/2/2020,,Massachusetts; Bryant; Yale,,,,,,,,,,,,,,,,,,,,,,,,,,
14278,Connor Smith,Smith,2021,OC,72.0,rivals,246.0,rivals,20.0,,,,,,5.1,ncsa,,,,,,,,,,,,,,,,,,,3.3,,,,
25947,Connor Smith,Smith,2022,QB,72.0,rivals_combine,163.0,rivals_combine,,,,,,,5.597,rivals_combine,4.537,rivals_combine,19.3,rivals_combine,98.0,rivals_combine,7.734,rivals_combine,,,,,,,,,,,3.5,,,,
45936,Connor Smith,Smith,2021,TE,72.0,rivals_combine,187.0,rivals_combine,,Morehead State,12/16/2020,,,,,,,,27.9,rivals_combine,104.0,rivals_combine,,,,,,,,,,,,,3.4,,,,
51845,Connor Smith,Smith,2022,OT,71.0,ncsa,235.0,ncsa,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,,,,
51970,Connor Smith,Smith,2023,QB,72.0,ncsa,210.0,ncsa,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,,,,
52767,Connor Smith,Smith,2023,WR,67.0,rivals_combine,123.0,rivals_combine,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,,,,
54601,Connor Smith,Smith,2021,RB,70.0,national_preps,206.0,national_preps,,,,,,,,,,,,,,,,,,,,,,,,,,,2.7,,,,
67430,Connor Smith,Smith,2022,QB,74.0,rivals,169.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
70777,Connor Smith,Smith,2024,WR,68.0,rivals,133.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [110]:
data["Full Name"].value_counts()

Jordan Smith       14
Connor Smith       10
Jayden Williams     9
Jacob Smith         9
Jordan Johnson      9
                   ..
Ashton Stroman      1
Johnny Lindsay      1
Tanner Cassell      1
Jarred PArks        1
Simeon Smith        1
Name: Full Name, Length: 67648, dtype: int64

In [105]:
idk_2 = data.merge(original_data, how = "left", on = ["Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"])

In [107]:
idk_2 = idk_2.drop_duplicates()
idk_2

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,Commitment Date,Signed To,Offers,Visits,Forty Yard Dash,Forty Yard Dash Source,Shuttle,Shuttle Source,Vertical Jump,Vertical Jump Source,Broad Jump,Broad Jump Source,3 Cone,3 Cone Source,Wingspan,Wingspan Source,Arm Length,Arm Length Source,Hand Size,Hand Size Source,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38,Zcruit ID
0,Korey Foreman,Foreman,2021,DE,76.0,nike_opening,248.0,nike_opening,96.6970,USC,1/2/2021,USC,Sam Houston State; USC; Utah; Arizona; Oregon;...,Alabama,4.880,nike_opening,4.700,nike_opening,30.4,nike_opening,104.0,rivals_combine,7.758,rivals_combine,,,,,,,44.5,nike_opening,98.76,nike_opening,3.03,,,,,2481c4e2-f105-4e9f-b85d-0925a343910b
2,J.T. Tuimoloau,Tuimoloau,2021,DE,76.0,national_preps,275.0,national_preps,96.6970,,,,Alabama; Texas A&M; Penn State; Ohio State; US...,Alabama; Washington; Washington,4.750,national_preps,,,,,,,,,,,,,,,,,,,3.00,,,,,c4fcba47-0337-4047-bc4d-0187da6f698e
4,JC Latham,Latham,2021,OT,78.0,national_preps,279.0,nike_opening,96.6970,Alabama,6/12/2020,Alabama,Iowa; Minnesota; Michigan; Penn State; Oklahom...,Minnesota; Iowa; Ohio State; Ohio State,5.460,nike_opening,4.870,nike_opening,26.8,nike_opening,,,,,,,,,,,,,,,2.35,,,,,1db04d99-cca6-4bfc-8a5a-3d40ae1795e0
6,Emeka Egbuka,Egbuka,2021,WR,72.5,nike_opening,190.0,nike_opening,96.3939,Ohio State,12/11/2020,Ohio State,Washington; Florida; Oklahoma; Washington Stat...,Florida State; Ohio State; Notre Dame; Michiga...,4.859,national_combine,4.200,nike_opening,35.1,nike_opening,112.0,national_combine,7.083,national_combine,75.0,national_combine,,,9.00,national_combine,41.0,nike_opening,,,4.00,,,,,abda246f-bb45-4b10-baf1-a9e5d508cc84
8,Dallas Turner,Turner,2021,DE,74.5,rivals_camp,223.0,rivals_camp,96.3939,Alabama,7/1/2020,Alabama,LSU; Kentucky; Baylor; Tennessee; Syracuse; Ma...,Miami (FL); Miami (FL); Florida State; Ohio St...,4.991,national_combine,4.487,national_combine,26.0,national_combine,115.0,national_combine,7.752,national_combine,83.5,rivals_camp,34.25,rivals_camp,9.84,rivals_camp,,,,,3.70,,,,,7351f453-1854-48ba-aa2c-e6eb9f4b9859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105124,Carl Collins III,Collins III,2022,WR,56.0,rivals,86.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1a456275-5dc4-49dd-a0dc-be45d36f0653
105126,Antrone\t Coston,Coston,2024,WR,61.0,rivals,85.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,cb489357-272f-49cd-99a4-903bcbd6ed86
105128,Gerald\t Lampkin,Lampkin,2024,WR,59.0,rivals,84.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,f5cbe094-2e5a-42b4-9917-d0a5121a0a33
105129,Jaeden\t Hamm,Hamm,2024,WR,58.0,rivals,80.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2d1ba563-d8b0-42de-aca2-38702763aa64


In [127]:
full_names = data["Full Name"].values

In [130]:
len(full_names)

71370

In [136]:
idk_2[idk_2['Full Name'].isin(full_names) == False]

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,Commitment Date,Signed To,Offers,Visits,Forty Yard Dash,Forty Yard Dash Source,Shuttle,Shuttle Source,Vertical Jump,Vertical Jump Source,Broad Jump,Broad Jump Source,3 Cone,3 Cone Source,Wingspan,Wingspan Source,Arm Length,Arm Length Source,Hand Size,Hand Size Source,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38,Zcruit ID


In [99]:
idk_ryan = data_ryan_fill.merge(original_data_ryan_fill, how = "left", on = ["Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"])

In [100]:
idk_ryan.drop_duplicates().shape[0]

73415

In [83]:
s = idk_ryan.iloc[0] == idk_ryan.iloc[1]
s.values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [63]:
pd.set_option('display.max_columns', None)
idk

Unnamed: 0,Full Name,Last Name_x,Grad Year,Position,Height_x,Height Source,Weight_x,Weight Source,Zcruit Rating_x,Committed To,Commitment Date_x,Signed To_x,Offers_x,Visits_x,Forty Yard Dash_x,Forty Yard Dash Source_x,Shuttle_x,Shuttle Source_x,Vertical Jump_x,Vertical Jump Source_x,Broad Jump_x,Broad Jump Source_x,3 Cone_x,3 Cone Source_x,Wingspan_x,Wingspan Source_x,Arm Length_x,Arm Length Source_x,Hand Size_x,Hand Size Source_x,Powerball Toss_x,Powerball Toss Source_x,SPARQ Rating_x,SPARQ Rating Source_x,GPA_x,ACT_x,SAT_x,Unnamed: 37,Unnamed: 38,Zcruit ID,First Name,Last Name_y,Height_y,Weight_y,Zcruit Rating_y,Status,Commitment Date_y,Signed To_y,Offers_y,Visits_y,Athlete Address,Athlete City,Athlete State,Athlete Zip Code,Athlete Cell Phone,Athlete Home Phone,DOB,Jersey Number,Parent Name,Parent Phone,Parent Email,HS/JuCo,School Name,School County,School Address,School City,School State,School Zip,ETS #,Coach Name,Coach Cell Phone,Coach Home Phone,Coach Email,Forty Yard Dash_y,Forty Yard Dash Source_y,Shuttle_y,Shuttle Source_y,Vertical Jump_y,Vertical Jump Source_y,Broad Jump_y,Broad Jump Source_y,3 Cone_y,3 Cone Source_y,Wingspan_y,Wingspan Source_y,Arm Length_y,Arm Length Source_y,Hand Size_y,Hand Size Source_y,Powerball Toss_y,Powerball Toss Source_y,SPARQ Rating_y,SPARQ Rating Source_y,GPA_y,ACT_y,SAT_y,Twitter,Instagram,Snapchat,247Sports,Rivals,Hudl,Zcruit
0,Korey Foreman,Foreman,2021,DE,76.0,nike_opening,248.0,nike_opening,96.697,USC,1/2/2021,USC,Sam Houston State; USC; Utah; Arizona; Oregon;...,Alabama,4.88,nike_opening,4.70,nike_opening,30.4,nike_opening,104.0,rivals_combine,7.758,rivals_combine,,,,,,,44.5,nike_opening,98.76,nike_opening,3.03,,,,,2481c4e2-f105-4e9f-b85d-0925a343910b,Korey,Foreman,"6' 4""",248.0,96.697,,2021-1-2,USC,Sam Houston State; USC; Utah; Arizona; Oregon;...,Alabama,"1880 Baywood Dr, Apt 201",Corona,CA,92881.0,(323) 316-3855,(323) 316-3855,,2,Kevin/Mariessa Foreman,(323) 316-3855,Mariessaforeman@Gmail.Com,HS,Centennial High School,Riverside,1820 RIMPAU AVE,Corona,CA,92881.0,50676.0,Matt Logan,(951) 532-9105,(951) 739-5670,huskycoach@sbcglobal.net,4.88,nike_opening,4.70,nike_opening,30.4,nike_opening,104.0,rivals_combine,7.758,rivals_combine,,,,,,,44.5,nike_opening,98.76,nike_opening,3.03,,,koreyforeman54,Koreyforeman_54,Korey4Man,https://247sports.com/player/46056100/,https://n.rivals.com/content/prospects/216686/,www.hudl.com/profiles/feed-user/3-10337933,https://zcruit.com/prospects/2481c4e2-f105-4e9...
1,Korey Foreman,Foreman,2021,DE,76.0,nike_opening,248.0,nike_opening,96.697,USC,1/2/2021,USC,Sam Houston State; USC; Utah; Arizona; Oregon;...,Alabama,4.88,nike_opening,4.70,nike_opening,30.4,nike_opening,104.0,rivals_combine,7.758,rivals_combine,,,,,,,44.5,nike_opening,98.76,nike_opening,3.03,,,,,2481c4e2-f105-4e9f-b85d-0925a343910b,Korey,Foreman,"6' 4""",248.0,96.697,,2021-1-2,USC,Sam Houston State; USC; Utah; Arizona; Oregon;...,Alabama,"1880 Baywood Dr, Apt 201",Corona,CA,92881.0,(323) 316-3855,(323) 316-3855,,2,Kevin/Mariessa Foreman,(323) 316-3855,Mariessaforeman@Gmail.Com,HS,Centennial High School,Riverside,1820 RIMPAU AVE,Corona,CA,92881.0,50676.0,Matt Logan,(951) 532-9105,(951) 739-5670,huskycoach@sbcglobal.net,4.88,nike_opening,4.70,nike_opening,30.4,nike_opening,104.0,rivals_combine,7.758,rivals_combine,,,,,,,44.5,nike_opening,98.76,nike_opening,3.03,,,koreyforeman54,Koreyforeman_54,Korey4Man,https://247sports.com/player/46056100/,https://n.rivals.com/content/prospects/216686/,www.hudl.com/profiles/feed-user/3-10337933,https://zcruit.com/prospects/2481c4e2-f105-4e9...
2,J.T. Tuimoloau,Tuimoloau,2021,DE,76.0,national_preps,275.0,national_preps,96.697,,,,Alabama; Texas A&M; Penn State; Ohio State; US...,Alabama; Washington; Washington,4.75,national_preps,,,,,,,,,,,,,,,,,,,3.00,,,,,c4fcba47-0337-4047-bc4d-0187da6f698e,J.T.,Tuimoloau,"6' 4""",275.0,96.697,,,,Alabama; Texas A&M; Penn State; Ohio State; US...,Alabama; Washington; Washington,,Sammamish,WA,98074.0,(253) 431-8348,,,33,,,,HS,Eastside Catholic High School,King,232 228TH AVE SE,Sammamish,WA,98074.0,480067.0,Dominic Daste,(425) 681-1965,(425) 295-3148,ddaste@eastsidecatholic.org,4.75,national_preps,,,,,,,,,,,,,,,,,,,3.00,,,JT_tuimoloau,,,https://247sports.com/player/46048440/,https://n.rivals.com/content/prospects/208999/,,https://zcruit.com/prospects/c4fcba47-0337-404...
3,J.T. Tuimoloau,Tuimoloau,2021,DE,76.0,national_preps,275.0,national_preps,96.697,,,,Alabama; Texas A&M; Penn State; Ohio State; US...,Alabama; Washington; Washington,4.75,national_preps,,,,,,,,,,,,,,,,,,,3.00,,,,,c4fcba47-0337-4047-bc4d-0187da6f698e,J.T.,Tuimoloau,"6' 4""",275.0,96.697,,,,Alabama; Texas A&M; Penn State; Ohio State; US...,Alabama; Washington; Washington,,Sammamish,WA,98074.0,(253) 431-8348,,,33,,,,HS,Eastside Catholic High School,King,232 228TH AVE SE,Sammamish,WA,98074.0,480067.0,Dominic Daste,(425) 681-1965,(425) 295-3148,ddaste@eastsidecatholic.org,4.75,national_preps,,,,,,,,,,,,,,,,,,,3.00,,,JT_tuimoloau,,,https://247sports.com/player/46048440/,https://n.rivals.com/content/prospects/208999/,,https://zcruit.com/prospects/c4fcba47-0337-404...
4,JC Latham,Latham,2021,OT,78.0,national_preps,279.0,nike_opening,96.697,Alabama,6/12/2020,Alabama,Iowa; Minnesota; Michigan; Penn State; Oklahom...,Minnesota; Iowa; Ohio State; Ohio State,5.46,nike_opening,4.87,nike_opening,26.8,nike_opening,,,,,,,,,,,,,,,2.35,,,,,1db04d99-cca6-4bfc-8a5a-3d40ae1795e0,JC,Latham,"6' 6""",279.0,96.697,,2020-6-12,Alabama,Iowa; Minnesota; Michigan; Penn State; Oklahom...,Minnesota; Iowa; Ohio State; Ohio State,8323 S. Verdev Dr,Oak Creek,WI,53154.0,(262) 617-8925,(262) 930-0593,,55,,,,HS,IMG Academy,Manatee,5500 34TH ST W,Bradenton,FL,34210.0,100179.0,Bobby Acosta,,(941) 739-7399,,5.46,nike_opening,4.87,nike_opening,26.8,nike_opening,,,,,,,,,,,,,,,2.35,,,TKJaayy,,,https://247sports.com/player/46049894/,https://n.rivals.com/content/prospects/209086/,www.hudl.com/profiles/feed-user/3-9698914,https://zcruit.com/prospects/1db04d99-cca6-4bf...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105127,Antrone\t Coston,Coston,2024,WR,61.0,rivals,85.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,cb489357-272f-49cd-99a4-903bcbd6ed86,Antrone\t,Coston,"5' 1""",85.0,,,,,,,,,,,,,,,,,,HS,Saint Brendan High School,Miami-Dade,2950 SW 87TH AVE,Miami,FL,33165.0,101097.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://n.rivals.com/content/prospects/266438/,,https://zcruit.com/prospects/cb489357-272f-49c...
105128,Gerald\t Lampkin,Lampkin,2024,WR,59.0,rivals,84.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,f5cbe094-2e5a-42b4-9917-d0a5121a0a33,Gerald\t,Lampkin,"4' 11""",84.0,,,,,,,,,,,,,,,,,,HS,John Ehret High School,Jefferson,4300 PATRIOT ST,Marrero,LA,70072.0,191724.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://n.rivals.com/content/prospects/266052/,,https://zcruit.com/prospects/f5cbe094-2e5a-42b...
105129,Jaeden\t Hamm,Hamm,2024,WR,58.0,rivals,80.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2d1ba563-d8b0-42de-aca2-38702763aa64,Jaeden\t,Hamm,"4' 10""",80.0,,,,,,,,,,,,,,,,,,HS,Middle School,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://n.rivals.com/content/prospects/262899/,,https://zcruit.com/prospects/2d1ba563-d8b0-42d...
105130,Eugene Thomas,Thomas,2022,RB,57.0,rivals,71.0,rivals,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7d27d80b-1156-45ec-afe0-6f638d45fea6,Eugene,Thomas,"4' 9""",71.0,,,,,,,,,,,,,,,,,,HS,Arlington Country Day School,Duval,5725 FORT CAROLINE RD,Jacksonville,FL,32277.0,100731.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,eugene_thomas45,,,,https://n.rivals.com/content/prospects/213073/,,https://zcruit.com/prospects/7d27d80b-1156-45e...


In [58]:
data.join(original_data, how = "left",  on = ["Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"])

ValueError: len(left_on) must equal the number of levels in the index of "right"

In [54]:
idk.iloc[1]

Full Name                                          Korey Foreman
Last Name_x                                              Foreman
Grad Year                                                   2021
Position                                                      DE
Height_x                                                    76.0
                                     ...                        
Snapchat                                               Korey4Man
247Sports                 https://247sports.com/player/46056100/
Rivals            https://n.rivals.com/content/prospects/216686/
Hudl                  www.hudl.com/profiles/feed-user/3-10337933
Zcruit         https://zcruit.com/prospects/2481c4e2-f105-4e9...
Name: 1, Length: 103, dtype: object

In [51]:
zcruit_data

Unnamed: 0,Full Name,Position,Height,Weight,Forty Yard Dash,3 Cone,Shuttle,Vertical Jump,Broad Jump,Wingspan,Arm Length,Hand Size,Powerball Toss
0,Korey Foreman,DE,76.0,248.0,4.8800,7.7580,4.7000,30.40000,104.0000,79.386800,33.250000,9.691052,44.500000
1,J.T. Tuimoloau,DE,76.0,275.0,4.7500,8.1930,4.9085,22.82900,93.0000,79.386800,33.250000,8.753886,39.000000
2,JC Latham,OT,78.0,279.0,5.4600,8.2187,4.8700,26.80000,90.9524,79.500000,34.000000,10.220960,29.176265
3,Emeka Egbuka,WR,72.5,190.0,4.8590,7.0830,4.2000,35.10000,112.0000,75.000000,32.193956,9.000000,41.000000
4,Dallas Turner,DE,74.5,223.0,4.9910,7.7520,4.4870,26.00000,115.0000,83.500000,34.250000,9.840000,38.378617
...,...,...,...,...,...,...,...,...,...,...,...,...,...
71117,Carl Collins III,WR,56.0,86.0,6.3404,7.0215,4.6500,31.90000,81.2572,59.464035,31.500000,7.813360,38.750000
71118,Antrone\t Coston,WR,61.0,85.0,6.3404,7.0215,4.6500,31.90000,81.2572,64.233611,31.500000,7.813360,38.750000
71119,Gerald\t Lampkin,WR,59.0,84.0,6.3404,7.0215,4.6500,31.90000,81.2572,62.325780,31.500000,7.813360,38.750000
71120,Jaeden\t Hamm,WR,58.0,80.0,6.3404,7.0215,4.6500,31.90000,81.2572,61.371865,31.500000,7.813360,38.750000


In [170]:
tf_data = pd.read_csv("clean_tf_data.csv", sep=",")

In [171]:
tf_data = tf_data.drop(columns = 'Unnamed: 0')

In [172]:
tf_data

Unnamed: 0,Zcruit ID,Player Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus
0,7b8c6df6-b548-4ed4-ae9b-4a741d7746c6,Bryson Lightfoot,11.25,,,,,,,,
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.00,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.90,,,,,,490.5,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,
...,...,...,...,...,...,...,...,...,...,...,...
7667,f1873c0b-a931-4604-a33c-4439e32789eb,Jerod Thomas,12.26,,,,64.0,,,,
7668,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,
7669,6bcf9f7b-4c2d-4db2-a19e-bdea9c358d04,Jesse James Garcia,12.23,24.16,,,,176.50,,,
7670,fb194f5e-bc21-4852-838b-443eebac9aaf,Isaiah Robinson,11.53,24.10,,,,234.00,,,


In [147]:
original_data_grouped = original_data.groupby(by = "Zcruit ID").to_frame

AttributeError: 'DataFrameGroupBy' object has no attribute 'to_frame'

In [145]:
original_data_grouped

Unnamed: 0_level_0,Full Name,Position,Height Source,Weight Source,Committed To
Zcruit ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0000a62d-eb96-4913-8be4-f4b2e168f2f8,Jd Horton,RB,ncsa,ncsa,0
000132c6-192b-410b-bd61-6522eb17a4e0,Aiden GreenAiden Green,CBCB,national_combinenational_combine,national_combinenational_combine,0
0001b513-8d0f-4687-9357-d40fd1d4700a,Haseem KhanHaseem Khan,LBLB,national_prepsnational_preps,national_prepsnational_preps,0
0001c4f3-c4ca-4391-b818-d2db22771bf2,Yusef LeakYusef Leak,WRWR,national_prepsnational_preps,national_prepsnational_preps,St. ThomasSt. Thomas
0002dad6-4d51-4674-86c3-3ca21bfe1c04,Kendrick Bradley,RB,twoFourSeven,twoFourSeven,0
...,...,...,...,...,...
fffc21a9-aa87-4e06-bc5d-0258e43ad0c1,Landon EaglerLandon Eagler,OLOL,national_prepsnational_preps,national_prepsnational_preps,0
fffcd947-1cdc-4ff6-b7a7-15c705ecfc17,Jadon\t Haynes,DL,national_preps,national_preps,0
fffdcf58-94a6-4407-b82f-7c7ef0459499,Taylor Savage,WR,ncsa,ncsa,0
fffebbbd-88d6-4721-a22a-5c4165a162c3,Owen Plane,LB,national_preps,national_preps,0


In [142]:
data.merge(original_data_grouped, how = "left", on = ["Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"])

TypeError: Can only merge Series or DataFrame objects, a <class 'pandas.core.groupby.generic.DataFrameGroupBy'> was passed

#### Merge and Handle Duplicates

In [13]:
original_data = pd.read_csv("ZCruit_Data.csv", error_bad_lines=False)
good_columns = ["Zcruit ID", "Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"]
original_data = original_data[good_columns]

b'Skipping line 1845: expected 70 fields, saw 72\nSkipping line 3345: expected 70 fields, saw 72\n'
b'Skipping line 11921: expected 70 fields, saw 72\nSkipping line 12609: expected 70 fields, saw 72\nSkipping line 13420: expected 70 fields, saw 72\nSkipping line 14108: expected 70 fields, saw 72\nSkipping line 14512: expected 70 fields, saw 72\n'
b'Skipping line 16512: expected 70 fields, saw 72\nSkipping line 21242: expected 70 fields, saw 72\nSkipping line 23743: expected 70 fields, saw 72\n'
b'Skipping line 58041: expected 70 fields, saw 72\nSkipping line 60541: expected 70 fields, saw 72\n'
b'Skipping line 70025: expected 70 fields, saw 72\nSkipping line 72525: expected 70 fields, saw 72\n'
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [14]:
data_with_zcruitid = data.merge(original_data, how = "left", on = ["Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"])
data_with_zcruitid = data_with_zcruitid.drop_duplicates()

In [15]:
data_with_zcruitid

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,...,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38,Zcruit ID
0,Korey Foreman,Foreman,2021,DE,76.0,nike_opening,248.0,nike_opening,96.6970,USC,...,44.5,nike_opening,98.76,nike_opening,3.03,,,,,2481c4e2-f105-4e9f-b85d-0925a343910b
2,J.T. Tuimoloau,Tuimoloau,2021,DE,76.0,national_preps,275.0,national_preps,96.6970,,...,,,,,3.00,,,,,c4fcba47-0337-4047-bc4d-0187da6f698e
4,JC Latham,Latham,2021,OT,78.0,national_preps,279.0,nike_opening,96.6970,Alabama,...,,,,,2.35,,,,,1db04d99-cca6-4bfc-8a5a-3d40ae1795e0
6,Emeka Egbuka,Egbuka,2021,WR,72.5,nike_opening,190.0,nike_opening,96.3939,Ohio State,...,41.0,nike_opening,,,4.00,,,,,abda246f-bb45-4b10-baf1-a9e5d508cc84
8,Dallas Turner,Turner,2021,DE,74.5,rivals_camp,223.0,rivals_camp,96.3939,Alabama,...,,,,,3.70,,,,,7351f453-1854-48ba-aa2c-e6eb9f4b9859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105124,Carl Collins III,Collins III,2022,WR,56.0,rivals,86.0,rivals,,,...,,,,,,,,,,1a456275-5dc4-49dd-a0dc-be45d36f0653
105126,Antrone\t Coston,Coston,2024,WR,61.0,rivals,85.0,rivals,,,...,,,,,,,,,,cb489357-272f-49cd-99a4-903bcbd6ed86
105128,Gerald\t Lampkin,Lampkin,2024,WR,59.0,rivals,84.0,rivals,,,...,,,,,,,,,,f5cbe094-2e5a-42b4-9917-d0a5121a0a33
105129,Jaeden\t Hamm,Hamm,2024,WR,58.0,rivals,80.0,rivals,,,...,,,,,,,,,,2d1ba563-d8b0-42de-aca2-38702763aa64


In [16]:
merge_with_tf = data_with_zcruitid[["Zcruit ID", "Position"]]

In [17]:
merge_with_tf

Unnamed: 0,Zcruit ID,Position
0,2481c4e2-f105-4e9f-b85d-0925a343910b,DE
2,c4fcba47-0337-4047-bc4d-0187da6f698e,DE
4,1db04d99-cca6-4bfc-8a5a-3d40ae1795e0,OT
6,abda246f-bb45-4b10-baf1-a9e5d508cc84,WR
8,7351f453-1854-48ba-aa2c-e6eb9f4b9859,DE
...,...,...
105124,1a456275-5dc4-49dd-a0dc-be45d36f0653,WR
105126,cb489357-272f-49cd-99a4-903bcbd6ed86,WR
105128,f5cbe094-2e5a-42b4-9917-d0a5121a0a33,WR
105129,2d1ba563-d8b0-42de-aca2-38702763aa64,WR


In [173]:
tf_data_with_pos = tf_data.merge(merge_with_tf, how = "left", on = "Zcruit ID")

In [174]:
bryson = tf_data_with_pos.iloc[0]['Zcruit ID']

In [175]:
original_data[original_data['Zcruit ID'] == bryson]

Unnamed: 0,Zcruit ID,Full Name,Grad Year,Position,Height Source,Weight Source,Committed To


In [176]:
tf_data_with_pos['Position'].isna().sum()

575

In [177]:
tf_data_with_pos

Unnamed: 0,Zcruit ID,Player Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
0,7b8c6df6-b548-4ed4-ae9b-4a741d7746c6,Bryson Lightfoot,11.25,,,,,,,,,
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.00,,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.90,,,,,,490.5,,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,,CB
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,,RB
...,...,...,...,...,...,...,...,...,...,...,...,...
7681,f1873c0b-a931-4604-a33c-4439e32789eb,Jerod Thomas,12.26,,,,64.0,,,,,QB
7682,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,,RB
7683,6bcf9f7b-4c2d-4db2-a19e-bdea9c358d04,Jesse James Garcia,12.23,24.16,,,,176.50,,,,
7684,fb194f5e-bc21-4852-838b-443eebac9aaf,Isaiah Robinson,11.53,24.10,,,,234.00,,,,WR


In [178]:
tf_data_with_pos = tf_data_with_pos.rename(columns = {'Player Name': 'Full Name'})

In [179]:
tf_data_with_pos

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
0,7b8c6df6-b548-4ed4-ae9b-4a741d7746c6,Bryson Lightfoot,11.25,,,,,,,,,
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.00,,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.90,,,,,,490.5,,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,,CB
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,,RB
...,...,...,...,...,...,...,...,...,...,...,...,...
7681,f1873c0b-a931-4604-a33c-4439e32789eb,Jerod Thomas,12.26,,,,64.0,,,,,QB
7682,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,,RB
7683,6bcf9f7b-4c2d-4db2-a19e-bdea9c358d04,Jesse James Garcia,12.23,24.16,,,,176.50,,,,
7684,fb194f5e-bc21-4852-838b-443eebac9aaf,Isaiah Robinson,11.53,24.10,,,,234.00,,,,WR


In [186]:
master_fillna(tf_data_with_pos, "RB", ["100 Meter", "110 Meter Hurdles", "Long Jump"])

{'100 Meter': 'There are 245 NaNs out of 1251 players', '110 Meter Hurdles': 'There are 1176 NaNs out of 1251 players', 'Long Jump': 'There are 777 NaNs out of 1251 players'}


KeyError: '100 Meter'

In [2]:
combine_speed_events = ["Forty Yard Dash", "Shuttle", "3 Cone"]
"Shuttle" in combine_speed_events

True

In [150]:
combine_speed_events = ["Forty Yard Dash", "Shuttle", "3 Cone"]
track_events = ["100 Meter", "200 Meter", "110 Meter Hurdles", "300 Meter Hurdles"]
field_events = ['High Jump', 'Long Jump', 'Triple Jump', 'Shotput', 'Discus']
combine_metrics = ["Height", "Weight", "Forty Yard Dash", "3 Cone", "Shuttle", "Vertical Jump", "Broad Jump"]
zcruit_metrics = ["Wingspan","Arm Length","Hand Size", "Powerball Toss"]

def create_percentile(position, event, data):
    """Take in a position - CB, WR, QB, etc. and an event - Fourty Yard Dash, Weight, 3 Cone and return a list of percentile values of 0, 10th, ... 100th percentile. 
        Reverse for all speed events (lower is better) such as 40, 3 Cone, and Shuttle."""
    sorted_athletes = data[data['Position'] == position]     
    event_values = (sorted_athletes[event].astype(float)).dropna()
    percentile = []
    for i in np.arange(0, 1.01, 0.05):
        percentile.append(event_values.quantile(i))
    if event in combine_speed_events or event in track_events:
        return percentile[::-1]
    return percentile

def value_to_percentile(event, value, metric_dict):
    if event in combine_speed_events or event in track_events:
        return round((100 - scipy.stats.percentileofscore(metric_dict[event], value, kind='rank')), 2)
    else:
        return round((scipy.stats.percentileofscore(metric_dict[event], value, kind='rank')), 2)
    
def percentile_to_value(event, percentile, metric_dict):
    if event in combine_speed_events or event in track_events:
        return scipy.stats.scoreatpercentile(metric_dict[event], 100 - percentile)
    else:
        return scipy.stats.scoreatpercentile(metric_dict[event], percentile)
        
def add_score(metric, data, metric_dict):
    # data = data.dropna()
    data[metric + " " + "Percentile"] = data.apply(lambda x: value_to_percentile(metric, x[metric], metric_dict), axis = 1)
    return data

def nan_counter(data, metrics):
    nan_dict, readable_dict = {}, {}
    for metric in metrics:
        nan_dict[metric] = data[metric].isna().sum() / data.shape[0]
        readable_dict[metric] = "There are {} NaNs out of {} players".format(data[metric].isna().sum(), data.shape[0])
    print(readable_dict)
    return nan_dict

def master_fillna(data, position, metrics):
    # Filtering data    
    print(position)
    data1 = data[data["Position"] == position]
    data_with_metrics = data1[metrics]
    
    # Getting NaN information
    nan_info = nan_counter(data_with_metrics, metrics)
    
    # Classifying metrics into complete, incomplete, and drop
    complete_metric, incomplete_metric, drop_metric = [], [], []
    for metric in nan_info.keys():
        if nan_info[metric] == 0:
            complete_metric.append(metric)
        elif metric in combine_metrics or metric in zcruit_metrics:
            incomplete_metric.append(metric)
        elif nan_info[metric] > 1:
            drop_metric.append(metric)
        else:
            incomplete_metric.append(metric)
    
    data_with_metrics = data_with_metrics.drop(columns = drop_metric)
    
    metric_dict = {}
    for metric in data_with_metrics.columns:
        metric_dict[metric] = create_percentile(position, metric, data)
    # We want to fill in the incomplete metrics
    completed_metrics = {}
    for metric in incomplete_metric:
        correlations = data_with_metrics.corr()[metric]
        correlations = correlations[correlations != 1].sort_values(ascending = False)
        metric_filter = correlations > 0.4 
        best_predictors = metric_filter[metric_filter].index.values

        print(metric, best_predictors)
    
    #IF there are no good predictors for linear regression (ie: no metrics with correlation > 0.4 with incomplete metric),
    #then we use the average approach. 
    
    #For T&F data, percentile method is a little tricky because some players will have NaNs for everything! No ht, wt guarantee like in ZCruit data. 
        if len(best_predictors) == 0:
            #For example, if we are predicting Hand Size with the average approach. We want the percentiles for Height, Weight, and Wingspan
            all_other_metrics = incomplete_metric + complete_metric
            all_other_metrics.remove(metric)
            
            all_other_percentiles = data_with_metrics.copy()
            for percentile_metrics in all_other_metrics:
                all_other_percentiles = add_score(percentile_metrics, all_other_percentiles, metric_dict)
                
            empty_metric = all_other_percentiles[all_other_percentiles[metric].isna()]
            full_metric = all_other_percentiles[all_other_percentiles[metric].notna()]
            
            metric_percentile_names =  [x + " " + "Percentile" for x in all_other_metrics]
            only_percentiles = empty_metric[metric_percentile_names]
            only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)

            only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
            empty_metric[metric] = only_percentiles['Predicted' + " " + metric]
            
            all_metric = empty_metric.append(full_metric).sort_index()
            
            completed_metrics[metric] = all_metric[metric].values
            #print(completed_metrics, len(completed_metrics[metric]), np.count_nonzero(~np.isnan(completed_metrics[metric])))

        else:
            full_metric = data_with_metrics[data_with_metrics[metric].notna()]
            empty_metric = data_with_metrics[data_with_metrics[metric].isna()]
            print("There are {} players with metric and {} NaNs.".format(full_metric.shape[0], empty_metric.shape[0]))
            labels = [best_predictors[0]] + [metric]
            predictor = best_predictors[0]
            print(labels)
            full_metric_with_no_predictor_nulls = full_metric[labels].dropna()
            X = full_metric_with_no_predictor_nulls[[predictor]]
            y = full_metric_with_no_predictor_nulls[metric]
            
            reg = LinearRegression(normalize = True).fit(X,y)
            metric_mean = np.median(y)
            
            empty_metric_with_no_predictor_nulls = empty_metric[[predictor]].dropna()
            if len(empty_metric_with_no_predictor_nulls) == 0:
                empty_metric_with_no_predictor_nulls[metric] = metric_mean
            else: 
                empty_metric_with_no_predictor_nulls[metric] = reg.predict(empty_metric_with_no_predictor_nulls)

            empty_metric_with_predictor_nulls = empty_metric[empty_metric[predictor].isna()]
            empty_metric_with_predictor_nulls[metric] = metric_mean
            
            print("There are {} players with a {} (predictor) value and {} players with NaNs for {}.".format(empty_metric_with_no_predictor_nulls.shape[0], best_predictors[0], empty_metric_with_predictor_nulls.shape[0], best_predictors[0]))
            print(empty_metric_with_no_predictor_nulls.shape[0] +  empty_metric_with_predictor_nulls.shape[0] == empty_metric.shape[0])

            empty_metric = empty_metric_with_no_predictor_nulls.append(empty_metric_with_predictor_nulls).sort_index()
            
            all_metric = empty_metric.append(full_metric).sort_index()
            
            completed_metrics[metric] = all_metric[metric].values
            
            train_y = reg.predict(X)
            train_error = mean_squared_error(y, train_y)
            print(reg.coef_)
            print("The RMSE of the model on the training data is {}. The average {} of the training data is {}. On average, predictions are {} from the actual values in the training data".format(np.sqrt(train_error), metric, np.mean(y), np.sqrt(train_error)/np.mean(y)))
            #print(completed_metrics, len(completed_metrics[metric]), np.count_nonzero(~np.isnan(completed_metrics[metric])))
    
    for metric in incomplete_metric:
        print(metric)
        print("There are {} NaN values for this metric".format(data1[metric].isna().sum()))
        data1[metric] = completed_metrics[metric]
        print("There are {} NaN values for this metric".format(data1[metric].isna().sum()))
    
    if 'Zcruit ID' in tf_data_with_pos.columns:
        all_metrics = ["Zcruit ID", "Full Name", "Position"] + complete_metric + incomplete_metric
        cleaned_dataset = data1[all_metrics]
        return cleaned_dataset
    
    else:
        all_metrics = ["Full Name", "Position"] + complete_metric + incomplete_metric
        cleaned_dataset = data1[all_metrics]
        return cleaned_dataset

In [95]:
ex_rb = master_fillna(tf_data_with_pos, "RB", ["100 Meter", "200 Meter"])

{'100 Meter': 'There are 230 NaNs out of 1236 players', '200 Meter': 'There are 351 NaNs out of 1236 players'}
100 Meter ['200 Meter']
There are 1006 players with metric and 230 NaNs.
['200 Meter', '100 Meter']
There are 118 players with a 200 Meter (predictor) value and 112 players with NaNs for 200 Meter.
True
[0.25227193]
The RMSE of the model on the training data is 0.6484463647379628. The average 100 Meter of the training data is 12.095449804432844. On average, predictions are 0.05361076894389778 from the actual values in the training data
200 Meter ['100 Meter']
There are 885 players with metric and 351 NaNs.
['100 Meter', '200 Meter']
There are 239 players with a 100 Meter (predictor) value and 112 players with NaNs for 100 Meter.
True
[0.95915796]
The RMSE of the model on the training data is 1.2644005056006562. The average 200 Meter of the training data is 24.94474576271188. On average, predictions are 0.050688049404404766 from the actual values in the training data
100 Meter


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


In [46]:
ex_rb.tail(15)

Unnamed: 0,Zcruit ID,Full Name,Position,100 Meter,200 Meter
7550,55083d91-ace6-4b6d-9224-63bb9ec745c7,Jaylen Caldwell,RB,11.947935,24.36
7559,3670f8c4-c836-4899-a1c8-5c399c1a926c,Brysen Barrios,RB,12.24,24.48
7571,18cd6c7c-5dfc-4a02-94db-0c5dec353100,Logan Parsons,RB,12.99,25.91
7577,d3537297-dfda-45cf-8541-6eaa110e7282,Divon Woods,RB,12.84,25.658887
7610,046d5dd9-73c3-4485-9717-620a958f0479,Dylan Lesesne,RB,12.75,25.572563
7623,2006b927-4a7d-4937-9466-a0c63d534ecd,Charles Ennis,RB,11.97,24.74
7628,130c872a-fcda-419d-8e86-8528aaf7ba9c,Kenadie Stinson,RB,12.12,24.968293
7647,f5c88916-cb42-4667-8b5d-b014d3ebf8b3,Kazarius Adams,RB,12.02,24.64
7650,f80125f5-f899-49d3-9220-8b27ce97a3f0,Scheider\t Joseph,RB,12.34,25.36
7657,f86dfd47-fbe9-4ccf-8c8b-843e1e76c20f,Garrhet Manning,RB,12.22,25.95


In [40]:
rb =tf_data_with_pos[tf_data_with_pos['Position'] == 'RB']
rb = rb[['Zcruit ID', 'Full Name', 'Position', '100 Meter', '200 Meter']]

In [45]:
rb.tail(15)

Unnamed: 0,Zcruit ID,Full Name,Position,100 Meter,200 Meter
7550,55083d91-ace6-4b6d-9224-63bb9ec745c7,Jaylen Caldwell,RB,,24.36
7559,3670f8c4-c836-4899-a1c8-5c399c1a926c,Brysen Barrios,RB,12.24,24.48
7571,18cd6c7c-5dfc-4a02-94db-0c5dec353100,Logan Parsons,RB,12.99,25.91
7577,d3537297-dfda-45cf-8541-6eaa110e7282,Divon Woods,RB,12.84,
7610,046d5dd9-73c3-4485-9717-620a958f0479,Dylan Lesesne,RB,12.75,
7623,2006b927-4a7d-4937-9466-a0c63d534ecd,Charles Ennis,RB,,
7628,130c872a-fcda-419d-8e86-8528aaf7ba9c,Kenadie Stinson,RB,12.12,
7647,f5c88916-cb42-4667-8b5d-b014d3ebf8b3,Kazarius Adams,RB,12.02,24.64
7650,f80125f5-f899-49d3-9220-8b27ce97a3f0,Scheider\t Joseph,RB,12.34,25.36
7657,f86dfd47-fbe9-4ccf-8c8b-843e1e76c20f,Garrhet Manning,RB,12.22,25.95


Can impute NaNs for Zcruit data -> can do percentile method bc have ht/wt for every player or lin reg method if there's a strongly correlated attribute.

Not sure if we can do the same for T&F data. how to do percentile method for RB - 100M, 110M hurdle, LJ. Try to impute for 100M, But player has no 110M hurdle or LJ. Why would a player have no T&F data? It's not that they have no T&F data, they may just not have anything for the 3 metrics we provided. 

In [48]:
rb1 = tf_data_with_pos[tf_data_with_pos['Position'] == 'RB']
rb1

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,,RB
5,c98e0321-f3a4-4b85-ab2a-bbd16d46ea86,Isaac Jones,11.73,23.28,,,,194.50,,,,RB
8,a6b7e3c4-832d-4f6d-a039-e2af26012e82,Jahbez Hawkins,11.24,23.86,,,68.0,,,,,RB
13,e2936003-f909-4d01-9f48-836d664aaf7a,Alshadee Salaam,11.29,22.26,,,,,,,,RB
14,52031735-d3cc-48cd-ae8f-92c0b4659ba1,Jemel Carter,11.12,24.14,,,,206.50,,,,RB
...,...,...,...,...,...,...,...,...,...,...,...,...
7662,cba269e2-1a78-4b76-a3db-512740123e11,Eric Weatherly,11.28,23.83,,,,,,,,RB
7675,0e019e7e-8b32-4a87-9bcd-9d3996128e34,Aaron Jones,11.22,,,,,,,,,RB
7676,0f2ae21e-54a1-497e-846e-2b1d6f64e234,Zaquan Bryan,11.76,23.21,,,,,469.5,,,RB
7682,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,,RB


In [57]:
rb1[rb1['100 Meter'].isna() & rb1['200 Meter'].isna()]

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
393,708beb5c-057f-45c6-a85a-0e1fe63931c6,Bryant Callahan,,,,,,,,,,RB
647,8cc096f5-9221-495c-a7d0-8f58e3d35fd4,LeQuint Allen,,,,,,,,391.25,,RB
937,d5ec6b73-96e2-4bfa-a3f6-93f0acab0b05,Lajadan Tolbert,,,,,,,,530.00,1272.0,RB
953,f9a1b8c6-2fd1-4743-9135-55e0d7b7f411,Alex Tejada,,,,,,,,534.50,,RB
954,6360a3a6-a30c-4d27-9908-154435f648ae,Carroll Jones,,,,,,,,427.00,827.0,RB
...,...,...,...,...,...,...,...,...,...,...,...,...
7089,dae59625-8aac-4ef7-ae09-2b66ad1a42ce,Louie Adams,,,17.82,45.15,,,,,,RB
7251,93698517-e487-44c7-aee4-cadb89c6703d,Nehemiah Tarble,,,,47.74,,196.75,,,,RB
7284,2f3399aa-1973-4ce3-924d-ca8085c6f7b4,Elijah Uribe,,,,,,,,,1539.0,RB
7455,9d711813-74eb-4f74-b194-0c8196ccdd98,Kenneth Burgess,,,,,,,,567.50,1538.0,RB


In [58]:
desired = track_events + field_events

In [59]:
rb1.dropna(subset = desired)

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position


Drop position-filtered athletes with NO track or field metrics. Threshold is 4 because should have 3 vars (ID, Name, Pos) and nothing else if completely NaN.

In [67]:
no_nan_rb = rb1.dropna(thresh = 4)

In [71]:
no_nan_rb[no_nan_rb["Zcruit ID"] == 'f9a1b8c6-2fd1-4743-9135-55e0d7b7f411']

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
953,f9a1b8c6-2fd1-4743-9135-55e0d7b7f411,Alex Tejada,,,,,,,,534.5,,RB


For example, some RBs only throw Shot and Discus! Should we use ANY events to fill in NaNs. Or should we not fill in NaNs and use flexible weights for each position depending on what values a player has.

For example -- if position calls for 100M, 110M Hurdles, and LJ. Should we just use 100% 100M if they only have 100M. What if they have shot and discus but nothing else? 

In [68]:
rb1[rb1['100 Meter'].isna() & rb1['200 Meter'].isna()].head(25)

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
393,708beb5c-057f-45c6-a85a-0e1fe63931c6,Bryant Callahan,,,,,,,,,,RB
647,8cc096f5-9221-495c-a7d0-8f58e3d35fd4,LeQuint Allen,,,,,,,,391.25,,RB
937,d5ec6b73-96e2-4bfa-a3f6-93f0acab0b05,Lajadan Tolbert,,,,,,,,530.0,1272.0,RB
953,f9a1b8c6-2fd1-4743-9135-55e0d7b7f411,Alex Tejada,,,,,,,,534.5,,RB
954,6360a3a6-a30c-4d27-9908-154435f648ae,Carroll Jones,,,,,,,,427.0,827.0,RB
965,f2a57823-b2ef-4a2b-ae72-7a78e42ea236,Ty Fuselier,,,,,,,,495.0,,RB
1021,6947283d-4907-4002-ace2-82cd4a0dafbe,Carter Miller,,,,,,,,510.0,1411.0,RB
1112,49b6ddc5-7d84-4435-b6d5-acc9ed8f267a,Lewis Richardson,,,,,,,,479.0,,RB
1145,867c4106-129a-49bc-81f5-20a4647ccbd4,Austin Love,,,,,,,,421.0,1416.0,RB
1284,36b46004-f31b-4c05-86fc-c552231f09b7,LaVaughn Luellen,,,,,,,,472.0,830.0,RB


In [73]:
no_nan_rb

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,,RB
5,c98e0321-f3a4-4b85-ab2a-bbd16d46ea86,Isaac Jones,11.73,23.28,,,,194.50,,,,RB
8,a6b7e3c4-832d-4f6d-a039-e2af26012e82,Jahbez Hawkins,11.24,23.86,,,68.0,,,,,RB
13,e2936003-f909-4d01-9f48-836d664aaf7a,Alshadee Salaam,11.29,22.26,,,,,,,,RB
14,52031735-d3cc-48cd-ae8f-92c0b4659ba1,Jemel Carter,11.12,24.14,,,,206.50,,,,RB
...,...,...,...,...,...,...,...,...,...,...,...,...
7662,cba269e2-1a78-4b76-a3db-512740123e11,Eric Weatherly,11.28,23.83,,,,,,,,RB
7675,0e019e7e-8b32-4a87-9bcd-9d3996128e34,Aaron Jones,11.22,,,,,,,,,RB
7676,0f2ae21e-54a1-497e-846e-2b1d6f64e234,Zaquan Bryan,11.76,23.21,,,,,469.5,,,RB
7682,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,,RB


Metrics argument is kinda not needed for master fillna function. We will use ALL T&F metrics to fill because that guarantees our methods will work (percentile or lin reg). This will be the same for all positions. Filtering for chosen metrics will be done AFTER filling not DURING filling for the sake of data imputing. This is also a source for improvement -> can handle different cases with more nuance (only throws RB, dash RBs, jumps RBs, etc.)

In [82]:
ex_rb_FILLED = master_fillna(no_nan_rb, "RB", track_events + field_events)

{'100 Meter': 'There are 230 NaNs out of 1236 players', '200 Meter': 'There are 351 NaNs out of 1236 players', '110 Meter Hurdles': 'There are 1161 NaNs out of 1236 players', '300 Meter Hurdles': 'There are 1165 NaNs out of 1236 players', 'High Jump': 'There are 1119 NaNs out of 1236 players', 'Long Jump': 'There are 762 NaNs out of 1236 players', 'Triple Jump': 'There are 1090 NaNs out of 1236 players', 'Shotput': 'There are 1079 NaNs out of 1236 players', 'Discus': 'There are 1130 NaNs out of 1236 players'}
100 Meter ['200 Meter']
There are 1006 players with metric and 230 NaNs.
['200 Meter', '100 Meter']
There are 118 players with a 200 Meter (predictor) value and 112 players with NaNs for 200 Meter.
True
[0.25227193]
The RMSE of the model on the training data is 0.6484463647379628. The average 100 Meter of the training data is 12.095449804432844. On average, predictions are 0.05361076894389778 from the actual values in the training data
200 Meter ['300 Meter Hurdles' '100 Meter']
T

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 32 players with a 300 Meter Hurdles (predictor) value and 319 players with NaNs for 300 Meter Hurdles.
True
[0.28429891]
The RMSE of the model on the training data is 1.6264207462721445. The average 200 Meter of the training data is 25.518205128205125. On average, predictions are 0.06373570312257076 from the actual values in the training data
110 Meter Hurdles ['Discus' '300 Meter Hurdles']
There are 75 players with metric and 1161 NaNs.
['Discus', '110 Meter Hurdles']
There are 103 players with a Discus (predictor) value and 1058 players with NaNs for Discus.
True
[0.00391856]
The RMSE of the model on the training data is 0.01585359919964183. The average 110 Meter Hurdles of the training data is 17.61. On average, predictions are 0.0009002611697695531 from the actual values in the training data
300 Meter Hurdles ['110 Meter Hurdles' 'Discus' '200 Meter']
There are 71 players with metric and 1165 NaNs.
['110 Meter Hurdles', '300 Meter Hurdles']
There are 31 players with a 110

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean


In [86]:
ex_rb_FILLED[["100 Meter", "110 Meter Hurdles", "Long Jump"]]

Unnamed: 0,100 Meter,110 Meter Hurdles,Long Jump
4,11.74,17.52,207.500000
5,11.73,17.52,194.500000
8,11.24,17.52,219.250000
13,11.29,17.52,219.250000
14,11.12,17.52,206.500000
...,...,...,...
7662,11.28,17.52,219.250000
7675,11.22,17.52,219.250000
7676,11.76,17.52,224.075318
7682,11.51,17.52,199.250000


In [181]:
tf_data_with_pos.dropna(thresh = 4)

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.00,,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.90,,,,,,490.5,,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,,CB
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,,RB
5,c98e0321-f3a4-4b85-ab2a-bbd16d46ea86,Isaac Jones,11.73,23.28,,,,194.50,,,,RB
...,...,...,...,...,...,...,...,...,...,...,...,...
7681,f1873c0b-a931-4604-a33c-4439e32789eb,Jerod Thomas,12.26,,,,64.0,,,,,QB
7682,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,,RB
7683,6bcf9f7b-4c2d-4db2-a19e-bdea9c358d04,Jesse James Garcia,12.23,24.16,,,,176.50,,,,
7684,fb194f5e-bc21-4852-838b-443eebac9aaf,Isaiah Robinson,11.53,24.10,,,,234.00,,,,WR


In [180]:
tf_data_with_pos

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
0,7b8c6df6-b548-4ed4-ae9b-4a741d7746c6,Bryson Lightfoot,11.25,,,,,,,,,
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.00,,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.90,,,,,,490.5,,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,,CB
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,,RB
...,...,...,...,...,...,...,...,...,...,...,...,...
7681,f1873c0b-a931-4604-a33c-4439e32789eb,Jerod Thomas,12.26,,,,64.0,,,,,QB
7682,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,,RB
7683,6bcf9f7b-4c2d-4db2-a19e-bdea9c358d04,Jesse James Garcia,12.23,24.16,,,,176.50,,,,
7684,fb194f5e-bc21-4852-838b-443eebac9aaf,Isaiah Robinson,11.53,24.10,,,,234.00,,,,WR


In [161]:
tf_data

Unnamed: 0,Zcruit ID,Player Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus
0,7b8c6df6-b548-4ed4-ae9b-4a741d7746c6,Bryson Lightfoot,11.25,,,,,,,,
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.00,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.90,,,,,,490.5,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,
...,...,...,...,...,...,...,...,...,...,...,...
7667,f1873c0b-a931-4604-a33c-4439e32789eb,Jerod Thomas,12.26,,,,64.0,,,,
7668,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,
7669,6bcf9f7b-4c2d-4db2-a19e-bdea9c358d04,Jesse James Garcia,12.23,24.16,,,,176.50,,,
7670,fb194f5e-bc21-4852-838b-443eebac9aaf,Isaiah Robinson,11.53,24.10,,,,234.00,,,


In [182]:
tf_data_with_pos = tf_data_with_pos.dropna(thresh = 4)

In [183]:
tf_data_with_pos

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.00,,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.90,,,,,,490.5,,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,,CB
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.50,,,,RB
5,c98e0321-f3a4-4b85-ab2a-bbd16d46ea86,Isaac Jones,11.73,23.28,,,,194.50,,,,RB
...,...,...,...,...,...,...,...,...,...,...,...,...
7681,f1873c0b-a931-4604-a33c-4439e32789eb,Jerod Thomas,12.26,,,,64.0,,,,,QB
7682,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,11.51,23.05,,,,199.25,433.5,,,RB
7683,6bcf9f7b-4c2d-4db2-a19e-bdea9c358d04,Jesse James Garcia,12.23,24.16,,,,176.50,,,,
7684,fb194f5e-bc21-4852-838b-443eebac9aaf,Isaiah Robinson,11.53,24.10,,,,234.00,,,,WR


In [184]:
%%time
cleaned_tf_metrics_position_dataframes = {}
for position in positions:
    cleaned_tf_metrics_position_dataframes[position] = master_fillna(tf_data_with_pos, position, track_events + field_events)

WR
{'100 Meter': 'There are 470 NaNs out of 1474 players', '200 Meter': 'There are 403 NaNs out of 1474 players', '110 Meter Hurdles': 'There are 1312 NaNs out of 1474 players', '300 Meter Hurdles': 'There are 1311 NaNs out of 1474 players', 'High Jump': 'There are 1167 NaNs out of 1474 players', 'Long Jump': 'There are 768 NaNs out of 1474 players', 'Triple Jump': 'There are 1177 NaNs out of 1474 players', 'Shotput': 'There are 1427 NaNs out of 1474 players', 'Discus': 'There are 1444 NaNs out of 1474 players'}
100 Meter ['200 Meter' '300 Meter Hurdles']
There are 1004 players with metric and 470 NaNs.
['200 Meter', '100 Meter']
There are 273 players with a 200 Meter (predictor) value and 197 players with NaNs for 200 Meter.
True
[0.31758863]
The RMSE of the model on the training data is 0.5621377260998385. The average 100 Meter of the training data is 12.094736842105252. On average, predictions are 0.04647787987770644 from the actual values in the training data
200 Meter ['100 Meter'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.0299289]
The RMSE of the model on the training data is 1.551026895201722. The average 300 Meter Hurdles of the training data is 44.14666666666667. On average, predictions are 0.03513349958928697 from the actual values in the training data
High Jump ['Triple Jump' 'Long Jump']
There are 307 players with metric and 1167 NaNs.
['Triple Jump', 'High Jump']
There are 196 players with a Triple Jump (predictor) value and 971 players with NaNs for Triple Jump.
True
[0.06761343]
The RMSE of the model on the training data is 4.676301701219336. The average High Jump of the training data is 64.29455445544555. On average, predictions are 0.0727324691931708 from the actual values in the training data
Long Jump ['Triple Jump' 'High Jump']
There are 706 players with metric and 768 NaNs.
['Triple Jump', 'Long Jump']
There are 51 players with a Triple Jump (predictor) value and 717 players with NaNs for Triple Jump.
True
[0.37867905]
The RMSE of the model on the training data is 18.3803524455509. The

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Discus ['300 Meter Hurdles' '110 Meter Hurdles']
There are 30 players with metric and 1444 NaNs.
['300 Meter Hurdles', 'Discus']
There are 160 players with a 300 Meter Hurdles (predictor) value and 1284 players with NaNs for 300 Meter Hurdles.
True
[23.80155095]
The RMSE of the model on the training data is 43.73978091057082. The average Discus of the training data is 1027.25. On average, predictions are 0.042579489813162154 from the actual values in the training data
100 Meter
There are 470 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 403 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 1312 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 1311 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 1167 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 768 NaN values for this metric
There a

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


100 Meter ['200 Meter']
There are 1006 players with metric and 230 NaNs.
['200 Meter', '100 Meter']
There are 118 players with a 200 Meter (predictor) value and 112 players with NaNs for 200 Meter.
True
[0.25227193]
The RMSE of the model on the training data is 0.6484463647379628. The average 100 Meter of the training data is 12.095449804432844. On average, predictions are 0.05361076894389778 from the actual values in the training data
200 Meter ['300 Meter Hurdles' '100 Meter']
There are 885 players with metric and 351 NaNs.
['300 Meter Hurdles', '200 Meter']
There are 32 players with a 300 Meter Hurdles (predictor) value and 319 players with NaNs for 300 Meter Hurdles.
True
[0.28429891]
The RMSE of the model on the training data is 1.6264207462721445. The average 200 Meter of the training data is 25.518205128205125. On average, predictions are 0.06373570312257076 from the actual values in the training data
110 Meter Hurdles ['Discus' '300 Meter Hurdles']
There are 75 players with met

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

High Jump ['Shotput' 'Long Jump']
There are 117 players with metric and 1119 NaNs.
['Shotput', 'High Jump']
There are 147 players with a Shotput (predictor) value and 972 players with NaNs for Shotput.
True
[0.04761492]
The RMSE of the model on the training data is 4.184827739251241. The average High Jump of the training data is 61.8716572. On average, predictions are 0.06763723372923072 from the actual values in the training data
Long Jump ['Triple Jump' 'High Jump']
There are 474 players with metric and 762 NaNs.
['Triple Jump', 'Long Jump']
There are 33 players with a Triple Jump (predictor) value and 729 players with NaNs for Triple Jump.
True
[0.37025793]
The RMSE of the model on the training data is 17.88873358609338. The average Long Jump of the training data is 216.0287610619469. On average, predictions are 0.08280718501627536 from the actual values in the training data
Triple Jump ['Long Jump' 'Shotput']
There are 146 players with metric and 1090 NaNs.
['Long Jump', 'Triple Ju

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Discus ['110 Meter Hurdles' 'Shotput' '300 Meter Hurdles']
There are 106 players with metric and 1130 NaNs.
['110 Meter Hurdles', 'Discus']
There are 72 players with a 110 Meter Hurdles (predictor) value and 1058 players with NaNs for 110 Meter Hurdles.
True
[255.11575803]
The RMSE of the model on the training data is 4.045135769322116. The average Discus of the training data is 859.6666666666666. On average, predictions are 0.004705470069005952 from the actual values in the training data
100 Meter
There are 230 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 351 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 1161 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 1165 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 1119 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 762 NaN values fo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

High Jump ['Triple Jump' 'Long Jump']
There are 73 players with metric and 532 NaNs.
['Triple Jump', 'High Jump']
There are 52 players with a Triple Jump (predictor) value and 480 players with NaNs for Triple Jump.
True
[0.07792292]
The RMSE of the model on the training data is 3.3450940238049514. The average High Jump of the training data is 62.890625. On average, predictions are 0.053189072676650156 from the actual values in the training data
Long Jump ['Triple Jump' 'High Jump']
There are 146 players with metric and 459 NaNs.
['Triple Jump', 'Long Jump']
There are 20 players with a Triple Jump (predictor) value and 439 players with NaNs for Triple Jump.
True
[0.33660167]
The RMSE of the model on the training data is 18.910794685996315. The average Long Jump of the training data is 207.25520833333334. On average, predictions are 0.09124400220418899 from the actual values in the training data
Triple Jump ['High Jump' 'Long Jump']
There are 68 players with metric and 537 NaNs.
['High J

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 101 players with a Shotput (predictor) value and 291 players with NaNs for Shotput.
True
[2.18909926]
The RMSE of the model on the training data is 223.07499449897426. The average Discus of the training data is 1132.8563224550264. On average, predictions are 0.1969137569145095 from the actual values in the training data
100 Meter
There are 298 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 317 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 556 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 562 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 532 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 459 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 537 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 31

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


100 Meter ['200 Meter' '300 Meter Hurdles' '110 Meter Hurdles']
There are 203 players with metric and 147 NaNs.
['200 Meter', '100 Meter']
There are 68 players with a 200 Meter (predictor) value and 79 players with NaNs for 200 Meter.
True
[0.26479812]
The RMSE of the model on the training data is 0.4744841116237274. The average 100 Meter of the training data is 12.371360544217689. On average, predictions are 0.038353430079725456 from the actual values in the training data
200 Meter ['100 Meter' '110 Meter Hurdles' '300 Meter Hurdles']
There are 215 players with metric and 135 NaNs.
['100 Meter', '200 Meter']
There are 56 players with a 100 Meter (predictor) value and 79 players with NaNs for 100 Meter.
True
[2.02138766]
The RMSE of the model on the training data is 1.310958760771062. The average 200 Meter of the training data is 25.51748299319727. On average, predictions are 0.05137492444378436 from the actual values in the training data
110 Meter Hurdles ['300 Meter Hurdles' '100 Met

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[1.29451862]
The RMSE of the model on the training data is 2.22326666008439. The average 300 Meter Hurdles of the training data is 45.885806451612915. On average, predictions are 0.04845216488521018 from the actual values in the training data
High Jump ['Shotput' 'Long Jump' 'Triple Jump']
There are 76 players with metric and 274 NaNs.
['Shotput', 'High Jump']
There are 33 players with a Shotput (predictor) value and 241 players with NaNs for Shotput.
True
[0.03302169]
The RMSE of the model on the training data is 2.730364680741326. The average High Jump of the training data is 62.95. On average, predictions are 0.04337354536523155 from the actual values in the training data
Long Jump ['Triple Jump' 'High Jump' 'Discus']
There are 136 players with metric and 214 NaNs.
['Triple Jump', 'Long Jump']
There are 17 players with a Triple Jump (predictor) value and 197 players with NaNs for Triple Jump.
True
[0.26460737]
The RMSE of the model on the training data is 18.26045643927761. The aver

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 12 players with a Shotput (predictor) value and 299 players with NaNs for Shotput.
True
[2.82242605]
The RMSE of the model on the training data is 183.9338454373405. The average Discus of the training data is 1214.2211538461538. On average, predictions are 0.15148298549626946 from the actual values in the training data
100 Meter
There are 147 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 135 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 307 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 298 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 274 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 214 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 277 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 312

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

High Jump ['Long Jump']
There are 29 players with metric and 247 NaNs.
['Long Jump', 'High Jump']
There are 13 players with a Long Jump (predictor) value and 234 players with NaNs for Long Jump.
True
[0.19201309]
The RMSE of the model on the training data is 4.2562206246584235. The average High Jump of the training data is 62.4375. On average, predictions are 0.068167697692227 from the actual values in the training data
Long Jump ['High Jump' 'Triple Jump' 'Discus']
There are 25 players with metric and 251 NaNs.
['High Jump', 'Long Jump']
There are 17 players with a High Jump (predictor) value and 234 players with NaNs for High Jump.
True
[2.49799903]
The RMSE of the model on the training data is 15.351642556603869. The average Long Jump of the training data is 206.35416666666666. On average, predictions are 0.07439463328793394 from the actual values in the training data
Triple Jump ['Long Jump']
There are 16 players with metric and 260 NaNs.
['Long Jump', 'Triple Jump']
There are 13 p

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


[2.58717616]
The RMSE of the model on the training data is 229.01820115624517. The average Discus of the training data is 1171.552795031056. On average, predictions are 0.1954826125869763 from the actual values in the training data
100 Meter
There are 210 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 207 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 251 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 255 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 247 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 251 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 260 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 69 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are 101 NaN values f

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.90533603]
The RMSE of the model on the training data is 2.681539177448454. The average 300 Meter Hurdles of the training data is 45.98946428571428. On average, predictions are 0.058307684577257005 from the actual values in the training data
High Jump ['Discus' 'Shotput' 'Triple Jump' 'Long Jump']
There are 96 players with metric and 522 NaNs.
['Discus', 'High Jump']
There are 6 players with a Discus (predictor) value and 516 players with NaNs for Discus.
True
[0.0052283]
The RMSE of the model on the training data is 0.302705868975474. The average High Jump of the training data is 65.66666666666667. On average, predictions are 0.004609734045311787 from the actual values in the training data
Long Jump ['Shotput' 'Triple Jump' 'High Jump']
There are 275 players with metric and 343 NaNs.
['Shotput', 'Long Jump']
There are 6 players with a Shotput (predictor) value and 337 players with NaNs for Shotput.
True
[1.52868012]
The RMSE of the model on the training data is 24.85780224722593. Th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.48041291]
The RMSE of the model on the training data is 13.935153509877452. The average Shotput of the training data is 384.5. On average, predictions are 0.036242271807223544 from the actual values in the training data
Discus ['High Jump']
There are 9 players with metric and 609 NaNs.
['High Jump', 'Discus']
There are 93 players with a High Jump (predictor) value and 516 players with NaNs for High Jump.
True
[180.]
The RMSE of the model on the training data is 56.166419386201454. The average Discus of the training data is 965.0. On average, predictions are 0.05820354340539011 from the actual values in the training data
100 Meter
There are 171 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 161 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 547 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 538 NaN values for this metric
There are 0 NaN values for this 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

300 Meter Hurdles []
High Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Long Jump []
Triple Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Shotput ['Discus']
There are 303 players with metric and 11 NaNs.
['Discus', 'Shotput']
There are 6 players with a Discus (predictor) value and 5 players with NaNs for Discus.
True
[0.18797399]
The RMSE of the model on the training data is 51.4718407953037. The average Shotput of the training data is 432.9621198488889. On average, predictions are 0.11888301178234309 from the actual values in the training data
Discus ['Shotput']
There are 231 players with metric and 83 NaNs.
['Shotput', 'Discus']
There are 78 players with a Shotput (predictor) value and 5 players with NaNs for Shotput.
True
[2.50988192]
The RMSE of the model on the training data is 188.08210205567593. The average Discus of the training data is 1096.068204071111. On average, predictions are 0.17159707886524322 from the actual values in the training data
100 Meter
There are 299 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 299 NaN values for this metric
There are 0 NaN values for th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

Long Jump []
Triple Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Shotput ['Discus']
There are 226 players with metric and 9 NaNs.
['Discus', 'Shotput']
There are 4 players with a Discus (predictor) value and 5 players with NaNs for Discus.
True
[0.20037025]
The RMSE of the model on the training data is 50.96942748717193. The average Shotput of the training data is 455.18636363636364. On average, predictions are 0.1119748559249241 from the actual values in the training data
Discus ['Shotput']
There are 169 players with metric and 66 NaNs.
['Shotput', 'Discus']
There are 61 players with a Shotput (predictor) value and 5 players with NaNs for Shotput.
True
[2.55145259]
The RMSE of the model on the training data is 181.880811662674. The average Discus of the training data is 1187.9257575757576. On average, predictions are 0.15310789458243979 from the actual values in the training data
100 Meter
There are 221 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 227 NaN values for this metric
There are 0 NaN values for thi

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[1.56924628]
The RMSE of the model on the training data is 2.3681487975800457. The average 300 Meter Hurdles of the training data is 46.8675. On average, predictions are 0.05052859225646868 from the actual values in the training data
High Jump ['Shotput' 'Triple Jump']
There are 58 players with metric and 294 NaNs.
['Shotput', 'High Jump']
There are 11 players with a Shotput (predictor) value and 283 players with NaNs for Shotput.
True
[0.04178299]
The RMSE of the model on the training data is 1.0223423631748516. The average High Jump of the training data is 57.916666666666664. On average, predictions are 0.01765195447208377 from the actual values in the training data
Long Jump ['Shotput' 'Discus']
There are 170 players with metric and 182 NaNs.
['Shotput', 'Long Jump']
There are 5 players with a Shotput (predictor) value and 177 players with NaNs for Shotput.
True
[0.20876699]
The RMSE of the model on the training data is 6.550049271166856. The average Long Jump of the training data i

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[13.29496488]
The RMSE of the model on the training data is 167.24754240327607. The average Discus of the training data is 1112.2. On average, predictions are 0.15037542025110237 from the actual values in the training data
100 Meter
There are 128 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 103 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 313 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 303 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 294 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 182 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 273 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 338 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are 344 NaN values for this 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

300 Meter Hurdles []
High Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Long Jump []
Triple Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Shotput ['Discus']
There are 248 players with metric and 6 NaNs.
['Discus', 'Shotput']
There are 4 players with a Discus (predictor) value and 2 players with NaNs for Discus.
True
[0.13865638]
The RMSE of the model on the training data is 61.65389993127462. The average Shotput of the training data is 446.9789393419689. On average, predictions are 0.13793468663655595 from the actual values in the training data
Discus ['100 Meter' 'Shotput']
There are 197 players with metric and 57 NaNs.
['100 Meter', 'Discus']
There are 2 players with a 100 Meter (predictor) value and 55 players with NaNs for 100 Meter.
True
[55.41511002]
The RMSE of the model on the training data is 166.94228893401942. The average Discus of the training data is 1022.0909090909091. On average, predictions are 0.1633340903917294 from the actual values in the training data
100 Meter
There are 241 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 247 NaN values for this metric
There are 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

300 Meter Hurdles []
High Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Long Jump []
Triple Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Shotput ['Discus']
There are 216 players with metric and 8 NaNs.
['Discus', 'Shotput']
There are 8 players with a Discus (predictor) value and 0 players with NaNs for Discus.
True
[0.18010766]
The RMSE of the model on the training data is 63.00736922266051. The average Shotput of the training data is 438.7752976190476. On average, predictions are 0.14359825989421265 from the actual values in the training data
Discus ['Shotput']
There are 176 players with metric and 48 NaNs.
['Shotput', 'Discus']
There are 48 players with a Shotput (predictor) value and 0 players with NaNs for Shotput.
True
[1.82442589]
The RMSE of the model on the training data is 200.53416465789383. The average Discus of the training data is 1131.438988095238. On average, predictions are 0.17723816022593522 from the actual values in the training data
100 Meter
There are 214 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 217 NaN values for this metric
There are 0 NaN values for th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


100 Meter ['110 Meter Hurdles' '200 Meter']
There are 48 players with metric and 88 NaNs.
['110 Meter Hurdles', '100 Meter']
There are 8 players with a 110 Meter Hurdles (predictor) value and 80 players with NaNs for 110 Meter Hurdles.
True
[0.44200003]
The RMSE of the model on the training data is 0.10196264196044504. The average 100 Meter of the training data is 12.270000000000001. On average, predictions are 0.008309913770207419 from the actual values in the training data
200 Meter ['100 Meter']
There are 48 players with metric and 88 NaNs.
['100 Meter', '200 Meter']
There are 18 players with a 100 Meter (predictor) value and 70 players with NaNs for 100 Meter.
True
[2.02083821]
The RMSE of the model on the training data is 1.092561538962829. The average 200 Meter of the training data is 26.14666666666667. On average, predictions are 0.04178588241826219 from the actual values in the training data
110 Meter Hurdles ['100 Meter' '300 Meter Hurdles']
There are 11 players with metric an

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 7 players with a Triple Jump (predictor) value and 106 players with NaNs for Triple Jump.
True
[0.06228189]
The RMSE of the model on the training data is 4.456809256987529. The average High Jump of the training data is 61.27777777777778. On average, predictions are 0.07273124807413918 from the actual values in the training data
Long Jump ['Triple Jump' 'Discus']
There are 19 players with metric and 117 NaNs.
['Triple Jump', 'Long Jump']
There are 6 players with a Triple Jump (predictor) value and 111 players with NaNs for Triple Jump.
True
[0.4415887]
The RMSE of the model on the training data is 15.855182387907535. The average Long Jump of the training data is 199.95. On average, predictions are 0.07929573587350605 from the actual values in the training data
Triple Jump ['Long Jump' 'High Jump']
There are 16 players with metric and 120 NaNs.
['Long Jump', 'Triple Jump']
There are 9 players with a Long Jump (predictor) value and 111 players with NaNs for Long Jump.
True
[1.43

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

100 Meter ['200 Meter' 'Discus' '300 Meter Hurdles' '110 Meter Hurdles']
There are 223 players with metric and 94 NaNs.
['200 Meter', '100 Meter']
There are 47 players with a 200 Meter (predictor) value and 47 players with NaNs for 200 Meter.
True
[0.33954367]
The RMSE of the model on the training data is 0.4899407497766905. The average 100 Meter of the training data is 12.021195652173919. On average, predictions are 0.04075640759478774 from the actual values in the training data
200 Meter ['100 Meter' '110 Meter Hurdles']
There are 231 players with metric and 86 NaNs.
['100 Meter', '200 Meter']
There are 39 players with a 100 Meter (predictor) value and 47 players with NaNs for 100 Meter.
True
[1.62968859]
The RMSE of the model on the training data is 1.0733665428788888. The average 200 Meter of the training data is 24.7936956521739. On average, predictions are 0.04329191411949822 from the actual values in the training data
110 Meter Hurdles ['300 Meter Hurdles' '200 Meter' '100 Meter

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 111 players with a Long Jump (predictor) value and 144 players with NaNs for Long Jump.
True
[0.09758503]
The RMSE of the model on the training data is 4.604040535862899. The average High Jump of the training data is 65.63829787234043. On average, predictions are 0.07014259487376216 from the actual values in the training data
Long Jump ['Triple Jump' 'High Jump']
There are 158 players with metric and 159 NaNs.
['Triple Jump', 'Long Jump']
There are 3 players with a Triple Jump (predictor) value and 156 players with NaNs for Triple Jump.
True
[0.36627122]
The RMSE of the model on the training data is 15.578913732866782. The average Long Jump of the training data is 217.91176470588235. On average, predictions are 0.0714918432875517 from the actual values in the training data
Triple Jump ['Long Jump' 'High Jump']
There are 54 players with metric and 263 NaNs.
['Long Jump', 'Triple Jump']
There are 107 players with a Long Jump (predictor) value and 156 players with NaNs for Long 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.09249924]
The RMSE of the model on the training data is 3.9754857421779386. The average High Jump of the training data is 65.89393939393939. On average, predictions are 0.060331584038570696 from the actual values in the training data
Long Jump ['Triple Jump' 'High Jump']
There are 124 players with metric and 120 NaNs.
['Triple Jump', 'Long Jump']
There are 19 players with a Triple Jump (predictor) value and 101 players with NaNs for Triple Jump.
True
[0.34513389]
The RMSE of the model on the training data is 18.565654537460297. The average Long Jump of the training data is 219.9875. On average, predictions are 0.08439413392788361 from the actual values in the training data
Triple Jump ['Long Jump']
There are 59 players with metric and 185 NaNs.
['Long Jump', 'Triple Jump']
There are 84 players with a Long Jump (predictor) value and 101 players with NaNs for Long Jump.
True
[0.952559]
The RMSE of the model on the training data is 30.84341103265686. The average Triple Jump of the trai

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy


Long Jump
There are 120 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 185 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 236 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are 237 NaN values for this metric
There are 0 NaN values for this metric
DL
{'100 Meter': 'There are 127 NaNs out of 159 players', '200 Meter': 'There are 133 NaNs out of 159 players', '110 Meter Hurdles': 'There are 157 NaNs out of 159 players', '300 Meter Hurdles': 'There are 157 NaNs out of 159 players', 'High Jump': 'There are 152 NaNs out of 159 players', 'Long Jump': 'There are 146 NaNs out of 159 players', 'Triple Jump': 'There are 154 NaNs out of 159 players', 'Shotput': 'There are 21 NaNs out of 159 players', 'Discus': 'There are 48 NaNs out of 159 players'}
100 Meter ['Triple Jump' '200 Meter']
There are 32 players with metric and 127 NaNs.
['Triple Jump', '100 Meter']
There are 3 players with a 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

300 Meter Hurdles []
High Jump ['Long Jump' 'Shotput' 'Triple Jump']
There are 7 players with metric and 152 NaNs.
['Long Jump', 'High Jump']
There are 10 players with a Long Jump (predictor) value and 142 players with NaNs for Long Jump.
True
[0.21561682]
The RMSE of the model on the training data is 0.34451694390375986. The average High Jump of the training data is 65.33333333333333. On average, predictions are 0.005273218529139182 from the actual values in the training data
Long Jump ['High Jump' 'Triple Jump']
There are 13 players with metric and 146 NaNs.
['High Jump', 'Long Jump']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

There are 4 players with a High Jump (predictor) value and 142 players with NaNs for High Jump.
True
[4.60526316]
The RMSE of the model on the training data is 1.5921958796888795. The average Long Jump of the training data is 208.16666666666666. On average, predictions are 0.007648659149826483 from the actual values in the training data
Triple Jump ['100 Meter' 'Discus' 'Shotput' 'High Jump' 'Long Jump']
There are 5 players with metric and 154 NaNs.
['100 Meter', 'Triple Jump']
There are 30 players with a 100 Meter (predictor) value and 124 players with NaNs for 100 Meter.
True
[39.12037037]
The RMSE of the model on the training data is 5.684341886080802e-14. The average Triple Jump of the training data is 455.125. On average, predictions are 1.248962787383862e-16 from the actual values in the training data
Shotput ['Triple Jump' 'High Jump' 'Discus']
There are 138 players with metric and 21 NaNs.
['Triple Jump', 'Shotput']
There are 2 players with a Triple Jump (predictor) value and 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

110 Meter Hurdles []
300 Meter Hurdles []
High Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric[metric] = only_percentiles['Predicted' + " " + metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percent

Long Jump []
Triple Jump []
Shotput ['Discus']
There are 95 players with metric and 3 NaNs.
['Discus', 'Shotput']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

There are 3 players with a Discus (predictor) value and 0 players with NaNs for Discus.
True
[0.12181053]
The RMSE of the model on the training data is 64.09689909530566. The average Shotput of the training data is 429.39. On average, predictions are 0.149274317276382 from the actual values in the training data
Discus ['Shotput']
There are 78 players with metric and 20 NaNs.
['Shotput', 'Discus']
There are 20 players with a Shotput (predictor) value and 0 players with NaNs for Shotput.
True
[1.62103801]
The RMSE of the model on the training data is 233.8251396112259. The average Discus of the training data is 1046.6633333333334. On average, predictions are 0.22340052638182847 from the actual values in the training data
100 Meter
There are 94 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 97 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 97 NaN values for this metric
There are 0 NaN values for this met

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


100 Meter []
200 Meter []
110 Meter Hurdles []
300 Meter Hurdles []
High Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Long Jump ['Triple Jump']
There are 7 players with metric and 7 NaNs.
['Triple Jump', 'Long Jump']
There are 1 players with a Triple Jump (predictor) value and 6 players with NaNs for Triple Jump.
True
[0.39130609]
The RMSE of the model on the training data is 19.124705444162398. The average Long Jump of the training data is 213.66666666666666. On average, predictions are 0.0895072017667507 from the actual values in the training data
Triple Jump ['Long Jump']
There are 4 players with metric and 10 NaNs.
['Long Jump', 'Triple Jump']
There are 4 players with a Long Jump (predictor) value and 6 players with NaNs for Long Jump.
True
[0.57407844]
The RMSE of the model on the training data is 23.164453927613643. The average Triple Jump of the training data is 502.1666666666667. On average, predictions are 0.046129015454922616 from the actual values in the training data
Shotput []
Discus []
100 Meter
There are 4 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric[metric] = only_percentiles['Predicted' + " " + metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

100 Meter ['200 Meter']
There are 8 players with metric and 16 NaNs.
['200 Meter', '100 Meter']
There are 3 players with a 200 Meter (predictor) value and 13 players with NaNs for 200 Meter.
True
[0.2922722]
The RMSE of the model on the training data is 0.3908311892622584. The average 100 Meter of the training data is 13.25. On average, predictions are 0.02949669352922705 from the actual values in the training data
200 Meter ['100 Meter']
There are 8 players with metric and 16 NaNs.
['100 Meter', '200 Meter']
There are 3 players with a 100 Meter (predictor) value and 13 players with NaNs for 100 Meter.
True
[1.96471179]
The RMSE of the model on the training data is 1.0133159680961183. The average 200 Meter of the training data is 27.488. On average, predictions are 0.03686393946799034 from the actual values in the training data
110 Meter Hurdles []
300 Meter Hurdles []
High Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

Long Jump []
Triple Jump []
Shotput ['Discus']
There are 17 players with metric and 7 NaNs.
['Discus', 'Shotput']
There are 0 players with a Discus (predictor) value and 7 players with NaNs for Discus.
True
[0.11964887]
The RMSE of the model on the training data is 45.91363572953311. The average Shotput of the training data is 417.13461538461536. On average, predictions are 0.11006910967386115 from the actual values in the training data
Discus ['Shotput']
There are 13 players with metric and 11 NaNs.
['Shotput', 'Discus']
There are 4 players with a Shotput (predictor) value and 7 players with NaNs for Shotput.
True
[2.76699012]
The RMSE of the model on the training data is 220.79599481856448. The average Discus of the training data is 1052.6538461538462. On average, predictions are 0.20975175802121657 from the actual values in the training data
100 Meter
There are 16 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 16 NaN values for this metric
Ther

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

High Jump []
Long Jump []
Triple Jump []
Shotput ['Discus']
There are 11 players with metric and 2 NaNs.
['Discus', 'Shotput']
There are 0 players with a Discus (predictor) value and 2 players with NaNs for Discus.
True
[0.25712263]
The RMSE of the model on the training data is 58.74507746150331. The average Shotput of the training data is 423.65. On average, predictions are 0.1386641743455761 from the actual values in the training data
Discus ['Shotput']
There are 10 players with metric and 3 NaNs.
['Shotput', 'Discus']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

There are 1 players with a Shotput (predictor) value and 2 players with NaNs for Shotput.
True
[2.52123532]
The RMSE of the model on the training data is 183.95348440201985. The average Discus of the training data is 1162.6. On average, predictions are 0.15822594564082218 from the actual values in the training data
100 Meter
There are 12 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 11 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 13 NaN values for this metric
There are 13 NaN values for this metric
300 Meter Hurdles
There are 13 NaN values for this metric
There are 13 NaN values for this metric
High Jump
There are 13 NaN values for this metric
There are 13 NaN values for this metric
Long Jump
There are 12 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 11 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 2 NaN values for this

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Triple Jump ['Long Jump' '200 Meter' '100 Meter']
There are 4 players with metric and 1 NaNs.
['Long Jump', 'Triple Jump']
There are 1 players with a Long Jump (predictor) value and 0 players with NaNs for Long Jump.
True
[2.29574556]
The RMSE of the model on the training data is 18.150519181307494. The average Triple Jump of the training data is 471.75. On average, predictions are 0.03847486842884471 from the actual values in the training data
Shotput []
Discus []
100 Meter
There are 1 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 1 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 5 NaN values for this metric
There are 5 NaN values for this metric
300 Meter Hurdles
There are 5 NaN values for this metric
There are 5 NaN values for this metric
High Jump
There are 4 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 1 NaN values for this metric
There are 0 NaN values 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

ANY NaNs come from events in which NO player in that position has a recorded time/value. THis is because we can't generate the metric dict (the percentile values) for that event since no players have anything recorded. THus, while we get a percentile, we don't get a percentile to value mapping (all NaNs) because no values exist!

In [185]:
cleaned_tf_metrics_position_dataframes['OL'].isna().sum()

Zcruit ID              0
Full Name              0
Position               0
100 Meter              0
200 Meter              0
110 Meter Hurdles    314
300 Meter Hurdles      0
High Jump              0
Long Jump              0
Triple Jump          314
Shotput                0
Discus                 0
dtype: int64

In [186]:
cleaned_tf_metrics_position_dataframes['OL']

Unnamed: 0,Zcruit ID,Full Name,Position,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus
839,02059551-326b-4e7c-a43e-451a7485086c,Jared Oakes,OL,13.7,27.35,,49.39,60.0,124.5,,544.75,1376.642583
880,e0cf4e50-355c-4cf7-b70e-afbf61d0b9c2,Cody Breidenbach,OL,13.7,27.35,,49.39,60.0,124.5,,559.00,1251.000000
888,3d2bc3f0-4833-42d5-9ffd-25cb8d191193,Anthony Dantzler,OL,13.7,27.35,,49.39,60.0,124.5,,501.75,1268.717661
904,aba375f5-4307-447c-982e-0128e1c07f3f,Jeremiah Burch,OL,13.7,27.35,,49.39,60.0,124.5,,485.00,1413.000000
918,f496799b-8459-455c-9e06-5856cb03788b,Micah Harrison,OL,13.7,27.35,,49.39,60.0,124.5,,467.50,927.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
7602,6f112ede-a5c4-4a51-83f7-9f9b8fa54883,Gage Stanaland,OL,13.7,27.35,,49.39,60.0,124.5,,421.75,1265.000000
7604,638f03cf-eefa-4aa3-8db0-dff7823ea283,Matthew Miller,OL,13.7,27.35,,49.39,60.0,124.5,,400.00,811.000000
7615,73a3e865-ff9a-4190-9899-8cddaa22fd30,Sylvester Gethers,OL,13.7,27.35,,49.39,60.0,124.5,,332.00,871.000000
7616,8ca463da-49bc-4b1e-8065-08ed82a6306f,Danny Dill,OL,13.7,27.35,,49.39,60.0,124.5,,366.50,876.750000


In [187]:
column_names = cleaned_tf_metrics_position_dataframes['QB'].columns
df = pd.DataFrame(columns = column_names)
for position in positions:
    df = df.append(cleaned_tf_metrics_position_dataframes[position])

In [188]:
df = df.sort_index()

In [189]:
df

Unnamed: 0,Zcruit ID,Full Name,Position,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,CB,12.51,24.94000,18.455,45.845,66.00,212.50,450.875000,378.750000,1103.0
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,RB,11.74,25.77000,17.520,47.840,63.00,207.50,438.017729,436.750000,831.0
5,c98e0321-f3a4-4b85-ab2a-bbd16d46ea86,Isaac Jones,RB,11.73,23.28000,17.520,44.910,63.00,194.50,423.156296,436.750000,831.0
6,7e546367-e3bd-44b4-a1e7-1b2cf182d224,David Godsey,WR,11.17,23.18000,17.910,44.810,64.00,222.75,461.500000,471.597175,982.0
7,20396bae-d68b-4b18-858e-e341d20f2b85,Hunt Young,QB,12.22,24.68000,17.650,45.240,64.00,217.75,451.750000,430.250000,1173.5
...,...,...,...,...,...,...,...,...,...,...,...,...
7679,74467a51-ab05-4f1b-bd3d-8ec717c90b43,Savonte Sanford-Paige,S,11.58,23.44000,17.675,46.575,59.75,217.00,468.250000,453.000000,1226.0
7681,f1873c0b-a931-4604-a33c-4439e32789eb,Jerod Thomas,QB,12.26,25.29238,17.650,45.240,64.00,217.75,451.750000,430.250000,1173.5
7682,1f7b327a-510e-4070-a3cd-bc0d2433010f,Tyler Bradley,RB,11.51,23.05000,17.520,44.910,63.00,199.25,433.500000,436.750000,831.0
7684,fb194f5e-bc21-4852-838b-443eebac9aaf,Isaiah Robinson,WR,11.53,24.10000,17.910,44.810,64.00,234.00,471.143387,432.601133,982.0


Lost 400 rows?

In [191]:
df.to_csv('filled_tf_data.csv')

In [198]:
still_nan_events_for_positions = {}
for position in positions:
    series = df[df['Position'] == position].isna().sum() > 0
    still_nan_events_for_positions[position] = series[series].index.values

In [199]:
still_nan_events_for_positions

{'WR': array([], dtype=object),
 'RB': array([], dtype=object),
 'LB': array([], dtype=object),
 'QB': array([], dtype=object),
 'DE': array([], dtype=object),
 'CB': array([], dtype=object),
 'OL': array(['110 Meter Hurdles', 'Triple Jump'], dtype=object),
 'DT': array(['High Jump'], dtype=object),
 'S': array([], dtype=object),
 'OT': array(['110 Meter Hurdles', '300 Meter Hurdles', 'Long Jump',
        'Triple Jump'], dtype=object),
 'OG': array(['110 Meter Hurdles', '300 Meter Hurdles', 'High Jump', 'Long Jump',
        'Triple Jump'], dtype=object),
 'TE': array([], dtype=object),
 'ATH': array([], dtype=object),
 'DB': array([], dtype=object),
 'DL': array([], dtype=object),
 'OC': array(['300 Meter Hurdles', 'High Jump', 'Long Jump', 'Triple Jump'],
       dtype=object),
 'K': array(['300 Meter Hurdles'], dtype=object),
 'FB': array(['300 Meter Hurdles'], dtype=object),
 'LS': array(['110 Meter Hurdles', '300 Meter Hurdles', 'High Jump'],
       dtype=object),
 'P': array(['110 

WR 	"100 Meter"	"200 Meter"	"110 Meter Hurdles"	"300 Meter Hurdles"	"High Jump"	"Long Jump"		
RB 	"100 Meter"	"110 Meter Hurdles"	"Long Jump"					
LB 	"Shotput"	"Discus"	"Hammer"					
QB 	"High Jump"							
DE 	"100 Meter"	"Shotput"	"Discus"	"Hammer"				
CB 	"100 Meter"	"200 Meter"	"110 Meter Hurdles"	"300 Meter Hurdles"	"High Jump"	"Long Jump"	"Triple Jump"	
OL	"High Jump"	"Long Jump"	"Triple Jump"	"Shotput"	"Discus"	"Hammer"		
DT 	"110 Meter Hurdles"	"300 Meter Hurdles"	"High Jump"	"Long Jump"	"Triple Jump"	"Shotput"	"Discus"	"Hammer"

S 	"100 Meter"	"200 Meter"	"110 Meter Hurdles"	"300 Meter Hurdles"	"High Jump"	"Long Jump"	"Triple Jump"	
OT 	"High Jump"	"Long Jump"	"Triple Jump"	"Shotput"	"Discus"	"Hammer"		
OG 								
TE 	"100 Meter"	"200 Meter"						
ATH	"100 Meter"	"200 Meter"	"110 Meter Hurdles"	"300 Meter Hurdles"	"High Jump"			
DB 	"100 Meter"	"200 Meter"						
DL	"High Jump"	"Shotput"	"Discus"					
OC 	"Shotput"	"Discus"	"Hammer"					
K 								
FB 								
LS								
P 								

In [201]:
df[df['Position'] == 'OC'].isna().sum()

Zcruit ID             0
Full Name             0
Position              0
100 Meter             0
200 Meter             0
110 Meter Hurdles     0
300 Meter Hurdles    98
High Jump            98
Long Jump            98
Triple Jump          98
Shotput               0
Discus                0
dtype: int64