In [13]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import re
import scipy.stats
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

### Read ZCruit-like data in
Load in the cleaned (but not filled-in) Zcruit data, `cleaned_zcruit_data.csv`. Show the first 5 rows. Data saved as `data` to be used throughout the notebook.

In [14]:
data = pd.read_csv("cleaned_zcruit_data.csv")
data.head(5)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,...,Hand Size Source,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38
0,Korey Foreman,Foreman,2021,DE,"6' 4""",nike_opening,248.0,nike_opening,96.697,USC,...,,44.5,nike_opening,98.76,nike_opening,3.03,,,,
1,J.T. Tuimoloau,Tuimoloau,2021,DE,"6' 4""",national_preps,275.0,national_preps,96.697,,...,,,,,,3.0,,,,
2,JC Latham,Latham,2021,OT,"6' 6""",national_preps,279.0,nike_opening,96.697,Alabama,...,,,,,,2.35,,,,
3,Emeka Egbuka,Egbuka,2021,WR,"6' 0.5""",nike_opening,190.0,nike_opening,96.3939,Ohio State,...,national_combine,41.0,nike_opening,,,4.0,,,,
4,Dallas Turner,Turner,2021,DE,"6' 2.5""",rivals_camp,223.0,rivals_camp,96.3939,Alabama,...,rivals_camp,,,,,3.7,,,,


### Define function to parse heights. 

In [15]:
def height_str_to_val(height_str):
    """Takes in a height string like "6' 4"" and converts string into corresponding float in inches such as 76.0 
        for stated example."""
    match = re.match("^(\d)'\s*(\d*\.*\d*)", height_str)
    groups = match.groups()
    feet, inches = int(groups[0]), float(groups[1])
    return feet * 12 + inches 

### Apply height function to data.
Only run this cell once! If you want to rerun this cell, you'll have to reload in the `data` cell. To verify the `height_str_to_val` function worked, check that the values in the height column are expressed as floats (76.0, 77.0, etc.) instead of 6' 4".

In [16]:
data['Height'] = data['Height'].apply(height_str_to_val)
data.head(5)

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,...,Hand Size Source,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38
0,Korey Foreman,Foreman,2021,DE,76.0,nike_opening,248.0,nike_opening,96.697,USC,...,,44.5,nike_opening,98.76,nike_opening,3.03,,,,
1,J.T. Tuimoloau,Tuimoloau,2021,DE,76.0,national_preps,275.0,national_preps,96.697,,...,,,,,,3.0,,,,
2,JC Latham,Latham,2021,OT,78.0,national_preps,279.0,nike_opening,96.697,Alabama,...,,,,,,2.35,,,,
3,Emeka Egbuka,Egbuka,2021,WR,72.5,nike_opening,190.0,nike_opening,96.3939,Ohio State,...,national_combine,41.0,nike_opening,,,4.0,,,,
4,Dallas Turner,Turner,2021,DE,74.5,rivals_camp,223.0,rivals_camp,96.3939,Alabama,...,rivals_camp,,,,,3.7,,,,


### Define master function to fill in empty values called master_fillna. 
Define the `master_fillna` function. Before that, we define needed sets of metrics, a list of all positions, and 5 helper/supporting functions: `create_percentile`, `value_to_percentile`, `percentile_to_value`, `add_score`, and `nan_counter`. Descriptions of each function are located in the docstring of the function in the code. 

In [17]:
combine_speed_events = ["Forty Yard Dash", "Shuttle", "3 Cone"]
track_events = ["100 Meter", "200 Meter", "110 Meter Hurdles", "300 Meter Hurdles"]
field_events = ['High Jump', 'Long Jump', 'Triple Jump', 'Shotput', 'Discus']
combine_metrics = ["Height", "Weight", "Forty Yard Dash", "3 Cone", "Shuttle", "Vertical Jump", "Broad Jump"]
zcruit_metrics = ["Wingspan","Arm Length","Hand Size", "Powerball Toss"]
positions = ["WR", "RB", "LB", "QB", "DE", "CB", "OL", "DT", "S", "OT", "OG", 
             "TE", "ATH", "DB", "DL", "OC", "K", "FB", "LS", "P"]

def create_percentile(position, event, data):
    """Take in a football position: CB, WR, QB, etc. and an event/measurable: Forty Yard Dash, Weight, 3 Cone, etc.
    
        Returns a list of percentile values in intervals of 5 (0th percentile, 5th percentile, 10th percentile, etc.). 
        Function also reverse values for all speed events (since lower is better) such as 40, 3 Cone, and Shuttle.
        
        For example:
        Inputs: position = 'WR', event = 'Forty Yard Dash', data = data
        Outputs: [24.0,
                 5.45,
                 5.2737,
                 5.18,
                 5.101,
                 5.03625,
                 4.988,
                 4.92195,
                 4.896800000000001,
                 4.839,
                 4.8,
                 4.764,
                 4.7,
                 4.7,
                 4.68,
                 4.63,
                 4.6,
                 4.6,
                 4.54,
                 4.5,
                 4.1] where each value represents a perentile in an interval of 5 (0th percentile, 5th percentile, 10th percentile, etc.)."""
    sorted_athletes = data[data['Position'] == position]     
    event_values = (sorted_athletes[event].astype(float)).dropna()
    percentile = []
    for i in np.arange(0, 1.01, 0.05):
        percentile.append(event_values.quantile(i))
    if event in combine_speed_events or event in track_events:
        return percentile[::-1]
    return percentile

def value_to_percentile(event, value, metric_dict):
    """Take in an event/measurable, a player's value for said event/measurable, and dictionary where the 
        keys are events and the values are percentile lists returned from the create_percentile function.
        
        Returns the corresponding percentile for the specific value.
        
        For example: 
        Inputs: event = 'Weight', value = 200, 
        metric_dict = {'Weight': [150, 165, 170, 175, 180, 185, 190, 195, 200, 220, 230]} 
        
        Outputs: 80 (since 200 is the 80th percentile from as can be seen from the list of percentiles)
        """
    if event in combine_speed_events or event in track_events:
        return round((100 - scipy.stats.percentileofscore(metric_dict[event], value, kind='rank')), 2)
    else:
        return round((scipy.stats.percentileofscore(metric_dict[event], value, kind='rank')), 2)
    
def percentile_to_value(event, percentile, metric_dict):
    """Take in an event/measurable, a percentile, and dictionary where the 
        keys are events and the values are percentile lists returned from the create_percentile function.
        
        Returns the corresponding value for the specific percentile. Is the reverse of the value_to_percentile functionality.
        
        For example: 
        Inputs: event = 'Weight', percentile = 90, 
        metric_dict = {'Weight': [150, 165, 170, 175, 180, 185, 190, 195, 200, 220, 230]} 
        
        Outputs: 200 (since the 80th percentile for Weight is 200 as can be seen from the list of percentiles)
        """
    if event in combine_speed_events or event in track_events:
        return scipy.stats.scoreatpercentile(metric_dict[event], 100 - percentile)
    else:
        return scipy.stats.scoreatpercentile(metric_dict[event], percentile)
        
def add_score(metric, data, metric_dict):
    """Take in a metric (another name for event/measurable), a ZCruit-like datset, and dictionary where the 
        keys are events and the values are percentile lists returned from the create_percentile function.
        
        Returns a dataset with an additional column called "[name of metric] Percentile"
        with the values being the percentiles for the values in the metric column.
        
        For example:
        Inputs: event = 'Weight', data = data, 
        metric_dict = {'Weight': [150, 165, 170, 175, 180, 185, 190, 195, 200, 220, 230]} 
        
        Outputs: a dataframe with an additional column called "Weight Percentile" where the values
        are the percentiles corresponding to the weight values for each player in the dataset. 
        """
    data[metric + " " + "Percentile"] = data.apply(lambda x: value_to_percentile(metric, x[metric], metric_dict), axis = 1)
    return data

def nan_counter(data, metrics):
    """Take in a ZCruit-like datset and a list of metrics.
        
        Prints a statement that describes the number of missing values (NaNs) for that metric. 
        
        Returns a dictionary where the keys are metrics and 
        the values are the percentage of players with missing values for each metric.
        
        For example:
        Inputs: data = data, metrics = ['Forty Yard Dash', 'Weight']
         
        Printed: {'Forty Yard Dash': 'There are 35994 NaNs out of 71370 players', 'Weight': 'There are 0 NaNs out of 71370 players'}
        Outputs: {'Forty Yard Dash': 0.5043295502311895, 'Weight': 0.0}
        """
    nan_dict, readable_dict = {}, {}
    for metric in metrics:
        nan_dict[metric] = data[metric].isna().sum() / data.shape[0]
        readable_dict[metric] = "There are {} NaNs out of {} players".format(data[metric].isna().sum(), data.shape[0])
    print(readable_dict)
    return nan_dict

def master_fillna(data, position, metrics):
    """Take in a ZCruit-like datset, a football position, and a list of metrics.
        
        Returns a dataset that fills in all possible empty values for given metrics. We fill in values using two methods:
        1) Linear Regression Method (use one metric to predict another metric) if the metric has a strong correlation (> 0.4) with another metric in that position.
            Common Ex: Shuttle is used to predict/fill-in 3 Cone and vice-versa.
        2) Mean Percentile Method (used if a metric doesn't have a correlation > 0.4 with any other metrics).
            This method fills in empty values using the mean percentile of all other events the player has values for.
            
            Common Ex: Broad Jump doesn't have many correlated metrics. We decide to use the mean percentile method.
            For a player who is missing the Broad Jump, we consider the other metrics they have values for. 
            Say the player has values for [Forty Yard Dash, Weight, Height] with their percentiles being [80, 60, 40], 
            we would use the average of those 3 percentiles (60) to fill-in for the empty value for the Broad Jump.
            We then convert this percentile to a value using the percentile_to_value function to get an actual value. 
                    
        For example:
        Inputs: data = data, position = 'CB', metrics = ['Forty Yard Dash', 'Weight', 'Height', 'Broad Jump']
        
        Prints: 
        1) Position
        2) Empty Values Information from nan_counter function, 
        3) Metric we are filling in and Best Predictors (metrics with correlation > 0.4 with that metric) for that Metric,
        4, 5) The linear regression coefficient (slope) and intercept if we use the regression method and the RMSE of the linear regression model on the training data:
        a smaller dataset comprised of players who actually have the metric we want to fill-in.
        6, 7) Number of empty values for metric before our function. The number of empty values for metric after function (should be 0).
         
        Outputs: a dataframe with Full Name, Position, Forty Yard Dash, Weight, Height, and Broad Jump with no missing values 
        as long as there are some players with values for given metrics at that position. 
        
        If there are no players with a value for that metric, the metric will remain NaN.
        This happens very infrequently and for less value-rich positions. 
        For example, Powerball Toss for Kickers may be empty because there are no kickers with a Powerball Toss and we can't generate percentile lists with no data.
        """
    # Filtering data    
    print(position)
    data1 = data[data["Position"] == position]
    data_with_metrics = data1[metrics]
    
    # Getting NaN information
    nan_info = nan_counter(data_with_metrics, metrics)
    
    # Classifying metrics into complete, incomplete, and drop
    complete_metric, incomplete_metric, drop_metric = [], [], []
    for metric in nan_info.keys():
        if nan_info[metric] == 0:
            complete_metric.append(metric)
        elif metric in combine_metrics or metric in zcruit_metrics:
            incomplete_metric.append(metric)
        elif nan_info[metric] > 1:
            drop_metric.append(metric)
        else:
            incomplete_metric.append(metric)
    
    data_with_metrics = data_with_metrics.drop(columns = drop_metric)
    
    metric_dict = {}
    for metric in data_with_metrics.columns:
        metric_dict[metric] = create_percentile(position, metric, data)
    # We want to fill in the incomplete metrics
    completed_metrics = {}
    for metric in incomplete_metric:
        correlations = data_with_metrics.corr()[metric]
        correlations = correlations[correlations != 1].sort_values(ascending = False)
        metric_filter = correlations > 0.4 
        best_predictors = metric_filter[metric_filter].index.values

        print(metric, best_predictors)
    
    #IF there are no good predictors for linear regression (ie: no metrics with correlation > 0.4 with incomplete metric),
    #then we use the average approach. 
    
    #For T&F data, percentile method is a little tricky because some players will have NaNs for everything! No ht, wt guarantee like in ZCruit data. 
        if len(best_predictors) == 0:
            #For example, if we are predicting Hand Size with the average approach. We want the percentiles for Height, Weight, and Wingspan
            all_other_metrics = incomplete_metric + complete_metric
            all_other_metrics.remove(metric)
            
            all_other_percentiles = data_with_metrics.copy()
            for percentile_metrics in all_other_metrics:
                all_other_percentiles = add_score(percentile_metrics, all_other_percentiles, metric_dict)
                
            empty_metric = all_other_percentiles[all_other_percentiles[metric].isna()]
            full_metric = all_other_percentiles[all_other_percentiles[metric].notna()]
            
            metric_percentile_names =  [x + " " + "Percentile" for x in all_other_metrics]
            only_percentiles = empty_metric[metric_percentile_names]
            only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)

            only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
            empty_metric[metric] = only_percentiles['Predicted' + " " + metric]
            
            all_metric = empty_metric.append(full_metric).sort_index()
            
            completed_metrics[metric] = all_metric[metric].values
            #print(completed_metrics, len(completed_metrics[metric]), np.count_nonzero(~np.isnan(completed_metrics[metric])))

        else:
            full_metric = data_with_metrics[data_with_metrics[metric].notna()]
            empty_metric = data_with_metrics[data_with_metrics[metric].isna()]
            labels = [best_predictors[0]] + [metric]
            predictor = best_predictors[0]
            full_metric_with_no_predictor_nulls = full_metric[labels].dropna()
            X = full_metric_with_no_predictor_nulls[[predictor]]
            y = full_metric_with_no_predictor_nulls[metric]
            
            reg = LinearRegression(normalize = True).fit(X,y)
            metric_mean = np.median(y)
            
            empty_metric_with_no_predictor_nulls = empty_metric[[predictor]].dropna()
            if len(empty_metric_with_no_predictor_nulls) == 0:
                empty_metric_with_no_predictor_nulls[metric] = metric_mean
            else: 
                empty_metric_with_no_predictor_nulls[metric] = reg.predict(empty_metric_with_no_predictor_nulls)

            empty_metric_with_predictor_nulls = empty_metric[empty_metric[predictor].isna()]
            empty_metric_with_predictor_nulls[metric] = metric_mean
            
            empty_metric = empty_metric_with_no_predictor_nulls.append(empty_metric_with_predictor_nulls).sort_index()
            
            all_metric = empty_metric.append(full_metric).sort_index()
            
            completed_metrics[metric] = all_metric[metric].values
            
            train_y = reg.predict(X)
            train_error = mean_squared_error(y, train_y)
            print(reg.coef_, reg.intercept_)
            print("The RMSE of the model on the training data is {}. The average {} of the training data is {}. On average, predictions are {} from the actual values in the training data".format(np.sqrt(train_error), metric, np.mean(y), np.sqrt(train_error)/np.mean(y)))
            #print(completed_metrics, len(completed_metrics[metric]), np.count_nonzero(~np.isnan(completed_metrics[metric])))
    
    for metric in incomplete_metric:
        print(metric)
        print("There are {} NaN values for this metric".format(data1[metric].isna().sum()))
        data1[metric] = completed_metrics[metric]
        print("There are {} NaN values for this metric".format(data1[metric].isna().sum()))
    
    if 'Zcruit ID' in data.columns:
        all_metrics = ["Zcruit ID", "Full Name", "Position"] + complete_metric + incomplete_metric
        cleaned_dataset = data1[all_metrics]
        return cleaned_dataset
    
    else:
        all_metrics = ["Full Name", "Position"] + complete_metric + incomplete_metric
        cleaned_dataset = data1[all_metrics]
        return cleaned_dataset

### Option 1: Fill in Dataset Yourself

Code below iterates through all the positions and creates a dictionary called `cleaned_combine_metrics_position_dataframes` where the keys are the positions and the values are the cleaned datasets for each position using the combine and zcruit metrics. Has all the print statements associated with the `master_fillna` function which is why you see many outputs.

In [18]:
%%time
cleaned_combine_metrics_position_dataframes = {}
for position in positions:
    cleaned_combine_metrics_position_dataframes[position] = master_fillna(data, position, combine_metrics + zcruit_metrics)

WR
{'Height': 'There are 0 NaNs out of 12359 players', 'Weight': 'There are 0 NaNs out of 12359 players', 'Forty Yard Dash': 'There are 6235 NaNs out of 12359 players', '3 Cone': 'There are 9546 NaNs out of 12359 players', 'Shuttle': 'There are 8483 NaNs out of 12359 players', 'Vertical Jump': 'There are 8608 NaNs out of 12359 players', 'Broad Jump': 'There are 8801 NaNs out of 12359 players', 'Wingspan': 'There are 11947 NaNs out of 12359 players', 'Arm Length': 'There are 12082 NaNs out of 12359 players', 'Hand Size': 'There are 11963 NaNs out of 12359 players', 'Powerball Toss': 'There are 12340 NaNs out of 12359 players'}
Forty Yard Dash []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

3 Cone ['Powerball Toss' 'Shuttle']
[0.02733333] 5.962333333333332
The RMSE of the model on the training data is 0.0. The average 3 Cone of the training data is 7.0215. On average, predictions are 0.0 from the actual values in the training data
Shuttle ['3 Cone']
[0.38332496] 1.7086494219744428
The RMSE of the model on the training data is 0.24604069736686787. The average Shuttle of the training data is 4.6740147588193075. On average, predictions are 0.05264011991032302 from the actual values in the training data
Vertical Jump ['Powerball Toss']
[0.47905169] 14.928799067236692
The RMSE of the model on the training data is 2.2896714453272. The average Vertical Jump of the training data is 31.72222222222222. On average, predictions are 0.072178784616269 from the actual values in the training data
Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Height' 'Arm Length' 'Weight']
[0.95391507] 6.044791501455151
The RMSE of the model on the training data is 2.3533220050715538. The average Wingspan of the training data is 73.49029126213593. On average, predictions are 0.03202221633164278 from the actual values in the training data
Arm Length ['Wingspan' 'Height' 'Weight']
[0.3974528] 2.384995730331898
The RMSE of the model on the training data is 1.4588901732158246. The average Arm Length of the training data is 31.645126353790616. On average, predictions are 0.04610157522853661 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['3 Cone' 'Vertical Jump']
[36.58536585] -218.13414634146284
The RMSE of the model on the training data is 4.4938668397781776e-14. The average Powerball Toss of the training data is 38.75. On average, predictions are 1.1597075715556587e-15 from the actual values in the training data
Forty Yard Dash
There are 6235 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 9546 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 8483 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 8608 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 8801 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 11947 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 12082 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 11963 NaN values for this metric
The

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Height' 'Arm Length' 'Weight']
[0.97635539] 4.348980282972306
The RMSE of the model on the training data is 2.3419878324285506. The average Wingspan of the training data is 71.59907407407407. On average, predictions are 0.03270975026863624 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
[0.34679] 6.0850852558839605
The RMSE of the model on the training data is 1.2674899811916. The average Arm Length of the training data is 30.94904458598726. On average, predictions are 0.04095409076910501 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 4197 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 6916 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 6000 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 6080 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 6206 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 8810 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 8923 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 8822 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 9063 NaN values for this metric
There are 0 NaN values for this metric
LB
{'Height': 'There are 0 NaNs out of 8832 players', 'Weight': 'There are 0 NaNs out of 8832 players', 'Forty Yard Dash': 'There are 4132 NaNs out of 8832 players', '3 C

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Height' 'Arm Length']
[0.79324485] 17.728476988862838
The RMSE of the model on the training data is 2.3552396022791497. The average Wingspan of the training data is 74.41271186440679. On average, predictions are 0.031651038421645156 from the actual values in the training data
Arm Length ['Wingspan']
[0.2739041] 11.440570931551356
The RMSE of the model on the training data is 1.2760844311014996. The average Arm Length of the training data is 31.989682539682537. On average, predictions are 0.03989049999225667 from the actual values in the training data
Hand Size ['Forty Yard Dash']
[9.60058488] -34.79493847318083
The RMSE of the model on the training data is 5.873787450068883. The average Hand Size of the training data is 13.8467231638418. On average, predictions are 0.4242005404865182 from the actual values in the training data
Powerball Toss ['3 Cone']
[4.26783421] 1.3521462861997335
The RMSE of the model on the training data is 2.3722216512990872. The average Powerball Toss

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Arm Length' 'Height' 'Weight']
[1.15778665] 37.51672230010552
The RMSE of the model on the training data is 2.2360715206828625. The average Wingspan of the training data is 74.26020408163265. On average, predictions are 0.030111303198477574 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
[0.39120008] 2.6853715443079125
The RMSE of the model on the training data is 1.2997831090287304. The average Arm Length of the training data is 31.7359693877551. On average, predictions are 0.04095614957109942 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['3 Cone' 'Height' 'Weight']
[15.0841432] -83.5407939774525
The RMSE of the model on the training data is 2.5211271378479276. The average Powerball Toss of the training data is 32.357142857142854. On average, predictions are 0.07791562898426267 from the actual values in the training data
Forty Yard Dash
There are 3160 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 5056 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 4329 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 4447 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 4570 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 6025 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 6149 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 6042 NaN values for this metric
T

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[7.208972] -25.488731308360876
The RMSE of the model on the training data is 4.93594780040838. The average Hand Size of the training data is 11.869921259842522. On average, predictions are 0.4158366085466236 from the actual values in the training data
Powerball Toss ['Vertical Jump']
[0.53618629] 24.43777338541789
The RMSE of the model on the training data is 3.2954989810234254. The average Powerball Toss of the training data is 38.77777777777778. On average, predictions are 0.08498421441034622 from the actual values in the training data
Forty Yard Dash
There are 1971 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 3086 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 2536 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 2634 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 2682 NaN values for this metric
There are 0 NaN values for this me

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


Forty Yard Dash ['Shuttle' '3 Cone']
[0.72924326] 1.6043172405103614
The RMSE of the model on the training data is 0.3043254448264179. The average Forty Yard Dash of the training data is 4.993793517406971. On average, predictions are 0.060940734486843375 from the actual values in the training data
3 Cone ['Shuttle' 'Forty Yard Dash']
[0.96738252] 3.2412168711678353
The RMSE of the model on the training data is 0.3879621482020245. The average 3 Cone of the training data is 7.768277819833468. On average, predictions are 0.049941847755689744 from the actual values in the training data
Shuttle ['Forty Yard Dash' '3 Cone']
[0.72947825] 1.0050726035983315
The RMSE of the model on the training data is 0.30437447401069945. The average Shuttle of the training data is 4.647936374549824. On average, predictions are 0.06548593816329504 from the actual values in the training data
Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Height' 'Arm Length' 'Powerball Toss' 'Weight']
[0.89487783] 10.077214155017856
The RMSE of the model on the training data is 2.2169774236918944. The average Wingspan of the training data is 72.64982517482517. On average, predictions are 0.030515936113499804 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
[0.33647196] 6.8407823080074195
The RMSE of the model on the training data is 1.3359766373674677. The average Arm Length of the training data is 31.323453608247423. On average, predictions are 0.04265100055939256 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Height' 'Wingspan' 'Weight']
[1.12397921] -47.14922048997776
The RMSE of the model on the training data is 2.315556954365124. The average Powerball Toss of the training data is 32.85. On average, predictions are 0.07048879617549846 from the actual values in the training data
Forty Yard Dash
There are 1743 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 2645 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 2229 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 2229 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 2305 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 3690 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 3782 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 3697 NaN values for this metric
There are 0 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Hand Size ['Broad Jump' 'Vertical Jump']
[0.28582265] -10.916661780872122
The RMSE of the model on the training data is 4.139409295137822. The average Hand Size of the training data is 11.5. On average, predictions are 0.3599486343598106 from the actual values in the training data
Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Forty Yard Dash
There are 2692 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 3623 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 3499 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 3573 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 3553 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 3773 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 3781 NaN values for this metric
There are 3781 NaN values for this metric
Hand Size
There are 3773 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 3781 NaN values for this metric
There are 3781 NaN values for this metric
DT
{'Height': 'There are 0 NaNs out of 2994 players', 'Weight': 'There are 0 NaNs out of 2994 players', 'Forty Yard Dash': 'There are 1380 NaNs out of 2994 players'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


Forty Yard Dash ['Shuttle' '3 Cone' 'Powerball Toss']
[0.92075159] 0.8937457826673247
The RMSE of the model on the training data is 1.2981026311442778. The average Forty Yard Dash of the training data is 5.76255260906758. On average, predictions are 0.2252652113061263 from the actual values in the training data
3 Cone ['Forty Yard Dash' 'Shuttle']
[1.06196738] 2.4975429519561363
The RMSE of the model on the training data is 0.6226738370604947. The average 3 Cone of the training data is 8.696946194225731. On average, predictions are 0.07159683676942995 from the actual values in the training data
Shuttle ['Forty Yard Dash' '3 Cone' 'Powerball Toss']
[0.60790126] 1.7847984268484072
The RMSE of the model on the training data is 1.054761605451129. The average Shuttle of the training data is 5.28786142001711. On average, predictions are 0.19946846592052256 from the actual values in the training data
Vertical Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Powerball Toss' 'Height' 'Arm Length']
[0.47816781] 59.157320205479465
The RMSE of the model on the training data is 2.3020771994174374. The average Wingspan of the training data is 76.29166666666667. On average, predictions are 0.030174687485537133 from the actual values in the training data
Arm Length ['Wingspan' 'Height' 'Hand Size']
[0.37559427] 3.8681934798122377
The RMSE of the model on the training data is 1.4186239566353043. The average Arm Length of the training data is 32.92088607594937. On average, predictions are 0.04309191293826359 from the actual values in the training data
Hand Size ['Arm Length']
[0.15492629] 4.227094353679956
The RMSE of the model on the training data is 0.6116415148675179. The average Hand Size of the training data is 9.32485294117647. On average, predictions are 0.06559261778452778 from the actual values in the training data
Powerball Toss ['Height' 'Wingspan' 'Forty Yard Dash' 'Weight' 'Shuttle']
[1.50314465] -74.24820305480688
The RMSE o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Wingspan ['Height' 'Arm Length']
[0.85510438] 13.168616001790127
The RMSE of the model on the training data is 2.1250495958460647. The average Wingspan of the training data is 73.78418803418803. On average, predictions are 0.028800880682747627 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
[0.28949406] 10.144361255981497
The RMSE of the model on the training data is 1.2548922810492908. The average Arm Length of the training data is 31.597701149425287. On average, predictions are 0.0397146702260052 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 1296 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 2097 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1698 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1771 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1805 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2754 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2784 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 2760 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 2865 NaN values for this metric
There are 0 NaN values for this metric
OT
{'Height': 'There are 0 NaNs out of 2599 players', 'Weight': 'There are 0 NaNs out of 2599 players', 'Forty Yard Dash': 'There are 1400 NaNs out of 2599 players', '3 C

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Arm Length']
[1.03006815] 44.31412794299875
The RMSE of the model on the training data is 2.2260404014498616. The average Wingspan of the training data is 79.46220930232558. On average, predictions are 0.028013824697228917 from the actual values in the training data
Arm Length ['Wingspan']
[0.35796096] 5.677724141420551
The RMSE of the model on the training data is 1.3122539700270295. The average Arm Length of the training data is 34.122093023255815. On average, predictions are 0.03845760484659211 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Shuttle' 'Weight']
[28.48246867] -109.53335713152462
The RMSE of the model on the training data is 4.168239883411639. The average Powerball Toss of the training data is 30.071428571428573. On average, predictions are 0.138611302536254 from the actual values in the training data
Forty Yard Dash
There are 1400 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 2098 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1753 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1855 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1883 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2476 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2513 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 2480 NaN values for this metric
There are

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Wingspan ['Arm Length' 'Powerball Toss' 'Height' 'Weight']
[1.760422] 19.4269691693127
The RMSE of the model on the training data is 2.015625649082049. The average Wingspan of the training data is 76.63068181818181. On average, predictions are 0.026303115165599515 from the actual values in the training data
Arm Length ['Wingspan' 'Height' 'Weight']
[0.38849945] 2.7233401564717283
The RMSE of the model on the training data is 0.9468838047934871. The average Arm Length of the training data is 32.49431818181818. On average, predictions are 0.029139980703558967 from the actual values in the training data
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Wingspan' 'Vertical Jump']
[0.57055215] -8.814417177914116
The RMSE of the model on the training data is 0.6555177163758565. The average Powerball Toss of the training data is 34.5. On average, predictions are 0.01900051351814077 from the actual values in the training data
Forty Yard Dash
There are 1170 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1897 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1506 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1647 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1670 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2329 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2377 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 2334 NaN values for this metric
There are 0 Na

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Powerball Toss ['3 Cone' 'Hand Size']
[16.12903226] -92.48387096774182
The RMSE of the model on the training data is 1.0048591735576161e-14. The average Powerball Toss of the training data is 35.0. On average, predictions are 2.8710262101646176e-16 from the actual values in the training data
Forty Yard Dash
There are 1343 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1986 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1781 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1808 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1845 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2255 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2278 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 2255 NaN values for this metric
There are 0 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


Forty Yard Dash ['3 Cone' 'Shuttle' 'Powerball Toss']
[0.51127988] 1.1443624475068934
The RMSE of the model on the training data is 0.33975283400619066. The average Forty Yard Dash of the training data is 5.111137476459507. On average, predictions are 0.06647303766940309 from the actual values in the training data
3 Cone ['Shuttle' 'Forty Yard Dash']
[1.08937538] 2.625700506386579
The RMSE of the model on the training data is 0.43460694632039665. The average 3 Cone of the training data is 7.756059674502714. On average, predictions are 0.05603450264173758 from the actual values in the training data
Shuttle ['3 Cone' 'Forty Yard Dash']
[0.45984859] 1.1428371832798696
The RMSE of the model on the training data is 0.2823678937804641. The average Shuttle of the training data is 4.7094502712477375. On average, predictions are 0.05995771852701878 from the actual values in the training data
Vertical Jump ['Broad Jump' 'Powerball Toss']
[0.26484887] -0.7452289987453256
The RMSE of the model on 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss ['Weight' 'Vertical Jump' 'Height' 'Forty Yard Dash']
[0.11159691] 15.819698299061432
The RMSE of the model on the training data is 2.8985915397489608. The average Powerball Toss of the training data is 36.5. On average, predictions are 0.07941346684243727 from the actual values in the training data
Forty Yard Dash
There are 1202 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1689 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1497 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1487 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1505 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2202 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2247 NaN values for this metric
There are 2247 NaN values for this metric
Hand Size
There are 2200 NaN values fo

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Hand Size ['3 Cone' 'Forty Yard Dash']
[1.15717725] 0.10003226374122143
The RMSE of the model on the training data is 0.4703603351990917. The average Hand Size of the training data is 9.055555555555555. On average, predictions are 0.05194163210787516 from the actual values in the training data
Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 1263 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1988 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1909 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1938 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1943 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2160 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2172 NaN values for this metric
There are 2172 NaN values for this metric
Hand Size
There are 2161 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 2172 NaN values for this metric
There are 2172 NaN values for this metric
DL
{'Height': 'There are 0 NaNs out of 2060 players', 'Weight': 'There are 0 NaNs out of 2060 players', 'Forty Yard Dash': 'There are 1242 NaNs out of 2060 players'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Arm Length []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)


Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 1242 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 1893 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 1834 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 1862 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 1869 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 2043 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 2060 NaN values for this metric
There are 2060 NaN values for this metric
Hand Size
There are 2043 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 2060 NaN values for this metric
There are 2060 NaN values for this metric
OC
{'Height': 'There are 0 NaNs out of 1304 players', 'Weight': 'There are 0 NaNs out of 1304 players', 'Forty Yard Dash': 'There are 555 NaNs out of 1304 players',

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Wingspan ['Arm Length']
[1.34850954] 33.05550444245834
The RMSE of the model on the training data is 2.28312197464914. The average Wingspan of the training data is 76.5909090909091. On average, predictions are 0.02980930768087898 from the actual values in the training data
Arm Length ['Wingspan' 'Height']
[0.34220076] 6.074623779283851
The RMSE of the model on the training data is 1.1501184902910302. The average Arm Length of the training data is 32.28409090909091. On average, predictions are 0.035624930357483514 from the actual values in the training data
Hand Size ['3 Cone']
[5.73550868] -35.15459776209953
The RMSE of the model on the training data is 5.726921094808721. The average Hand Size of the training data is 14.18142857142857. On average, predictions are 0.4038324535475073 from the actual values in the training data
Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 555 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 990 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 748 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 831 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 828 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 1276 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 1282 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 1277 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 1303 NaN values for this metric
There are 0 NaN values for this metric
K
{'Height': 'There are 0 NaNs out of 783 players', 'Weight': 'There are 0 NaNs out of 783 players', 'Forty Yard Dash': 'There are 472 NaNs out of 783 players', '3 Cone': 'The

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric[metric] = only_percentiles['Predicted' + " " + metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

Wingspan []
Arm Length []
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


Forty Yard Dash
There are 472 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 771 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 671 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 702 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 731 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 783 NaN values for this metric
There are 783 NaN values for this metric
Arm Length
There are 783 NaN values for this metric
There are 783 NaN values for this metric
Hand Size
There are 783 NaN values for this metric
There are 783 NaN values for this metric
Powerball Toss
There are 783 NaN values for this metric
There are 783 NaN values for this metric
FB
{'Height': 'There are 0 NaNs out of 326 players', 'Weight': 'There are 0 NaNs out of 326 players', 'Forty Yard Dash': 'There are 124 NaNs out of 326 players', '3 Cone':

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Hand Size ['Wingspan']
[15.16] -1142.3199999999997
The RMSE of the model on the training data is 1.9093976954970456e-13. The average Hand Size of the training data is 17.42. On average, predictions are 1.096095117966157e-14 from the actual values in the training data
Powerball Toss []
Forty Yard Dash
There are 124 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 243 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 178 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 188 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 210 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 324 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 325 NaN values for this metric
There are 0 NaN values for this metric
Hand Size
There are 324 NaN values for this metric
There are 0 NaN values for 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

Arm Length []
Hand Size []
Powerball Toss []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Forty Yard Dash
There are 124 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 245 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 197 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 218 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 223 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 276 NaN values for this metric
There are 0 NaN values for this metric
Arm Length
There are 277 NaN values for this metric
There are 277 NaN values for this metric
Hand Size
There are 276 NaN values for this metric
There are 0 NaN values for this metric
Powerball Toss
There are 277 NaN values for this metric
There are 277 NaN values for this metric
P
{'Height': 'There are 0 NaNs out of 165 players', 'Weight': 'There are 0 NaNs out of 165 players', 'Forty Yard Dash': 'There are 98 NaNs out of 165 players', '3 Cone': 'Ther

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


 ['Shuttle' '3 Cone']
[1.08110352] -0.24895501777450146
The RMSE of the model on the training data is 0.5386091734241818. The average Forty Yard Dash of the training data is 5.500440000000001. On average, predictions are 0.09792110693402377 from the actual values in the training data
3 Cone ['Forty Yard Dash']
[0.73137473] 4.260968317113438
The RMSE of the model on the training data is 0.4830818842683432. The average 3 Cone of the training data is 7.9136. On average, predictions are 0.061044516309687526 from the actual values in the training data
Shuttle ['Forty Yard Dash']
[0.89255936] 0.4086108130571926
The RMSE of the model on the training data is 0.48939401999631454. The average Shuttle of the training data is 5.318079999999998. On average, predictions are 0.09202456901669676 from the actual values in the training data
Vertical Jump []
Broad Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

Wingspan []
Arm Length []
Hand Size []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Powerball Toss []
Forty Yard Dash
There are 98 NaN values for this metric
There are 0 NaN values for this metric
3 Cone
There are 160 NaN values for this metric
There are 0 NaN values for this metric
Shuttle
There are 140 NaN values for this metric
There are 0 NaN values for this metric
Vertical Jump
There are 150 NaN values for this metric
There are 0 NaN values for this metric
Broad Jump
There are 149 NaN values for this metric
There are 0 NaN values for this metric
Wingspan
There are 165 NaN values for this metric
There are 165 NaN values for this metric
Arm Length
There are 165 NaN values for this metric
There are 165 NaN values for this metric
Hand Size
There are 165 NaN values for this metric
There are 165 NaN values for this metric
Powerball Toss
There are 165 NaN values for this metric
There are 165 NaN values for this metric
Wall time: 56 s


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

For example, here we access the filled-in dataset for RBs from `cleaned_combine_metrics_position_dataframes` and show the first 5 rows.

In [19]:
cleaned_combine_metrics_position_dataframes['RB'].head(5)

Unnamed: 0,Full Name,Position,Height,Weight,Forty Yard Dash,3 Cone,Shuttle,Vertical Jump,Broad Jump,Wingspan,Arm Length,Hand Size,Powerball Toss
23,Camar Wheaton,RB,71.0,190.0,4.5,7.75,4.654,30.984,109.968,73.670213,31.0,19.925888,37.1744
28,Donovan Edwards,RB,70.5,203.0,4.74,7.597472,4.5,33.1,107.0476,73.182035,31.0,9.731697,34.07616
41,Xavian Sorey,RB,74.0,207.0,4.6,7.75,4.654,31.28608,110.4768,77.0,32.787915,9.45,37.39072
45,Will Shipley,RB,71.0,198.0,4.46,6.94,4.21,36.3,121.0,73.0,32.0,9.45,37.35248
48,TreVeyon Henderson,RB,69.5,177.5,4.3,7.75,4.654,28.83292,105.982664,75.0,32.094335,9.06,34.0


#### Combine all filled-in datasets by position into one master filled-in dataset called df. 
Master filled in-dataset for ZCruit data is saved as `df` variable. 

In [20]:
column_names = cleaned_combine_metrics_position_dataframes['DE'].columns
df = pd.DataFrame(columns = column_names)
for position in positions:
    df = df.append(cleaned_combine_metrics_position_dataframes[position])
df = df.sort_index()

Show first 5 rows of `df`.

In [21]:
df.head(5)

Unnamed: 0,Full Name,Position,Height,Weight,Forty Yard Dash,3 Cone,Shuttle,Vertical Jump,Broad Jump,Wingspan,Arm Length,Hand Size,Powerball Toss
0,Korey Foreman,DE,76.0,248.0,4.88,7.758,4.7,30.4,104.0,79.3868,33.25,9.691052,44.5
1,J.T. Tuimoloau,DE,76.0,275.0,4.75,8.193,4.9085,22.829,93.0,79.3868,33.25,8.753886,39.0
2,JC Latham,OT,78.0,279.0,5.46,8.2187,4.87,26.8,90.9524,79.5,34.0,10.22096,29.176265
3,Emeka Egbuka,WR,72.5,190.0,4.859,7.083,4.2,35.1,112.0,75.0,32.193956,9.0,41.0
4,Dallas Turner,DE,74.5,223.0,4.991,7.752,4.487,26.0,115.0,83.5,34.25,9.84,38.378617


Not every single event for every single position can be filled in. This occurs when there are 0 players with values for that metric at that position. This happens with infrequent metrics (Arm Length, Powerball Toss, Wingspan, Hand Size) at generally less popular positions (ATH, OL, OC, etc). 

Run the following code with any dataset (substitute name of dataset with `df`). The code below shows which metrics are still empty after running the cleaning code for each position. The information is saved in the `still_nan_events_for_positions_zcruit` dictionary.

In [22]:
still_nan_events_for_positions_zcruit = {}
for position in positions:
    series = df[df['Position'] == position].isna().sum() > 0
    still_nan_events_for_positions_zcruit[position] = series[series].index.values
still_nan_events_for_positions_zcruit

{'WR': array([], dtype=object),
 'RB': array([], dtype=object),
 'LB': array([], dtype=object),
 'QB': array([], dtype=object),
 'DE': array([], dtype=object),
 'CB': array([], dtype=object),
 'OL': array(['Arm Length', 'Powerball Toss'], dtype=object),
 'DT': array([], dtype=object),
 'S': array([], dtype=object),
 'OT': array([], dtype=object),
 'OG': array([], dtype=object),
 'TE': array([], dtype=object),
 'ATH': array(['Arm Length'], dtype=object),
 'DB': array(['Arm Length', 'Powerball Toss'], dtype=object),
 'DL': array(['Arm Length', 'Powerball Toss'], dtype=object),
 'OC': array([], dtype=object),
 'K': array(['Wingspan', 'Arm Length', 'Hand Size', 'Powerball Toss'],
       dtype=object),
 'FB': array(['Powerball Toss'], dtype=object),
 'LS': array(['Arm Length', 'Powerball Toss'], dtype=object),
 'P': array(['Wingspan', 'Arm Length', 'Hand Size', 'Powerball Toss'],
       dtype=object)}

### Option 2: Load in Cleaned Data. 

Do this if you don't want to run cells in option 1. Can just load in the already cleaned and filled-in ZCruit dataset called `cleaned_filled_zcruit_data.csv`. This dataset is the end result from all the work in option 1. Save this dataset as `cleaned`.

In [23]:
cleaned = pd.read_csv("cleaned_filled_zcruit_data.csv")

Show the first 5 rows of `cleaned`. Should look exactly like `df` from Option 1.

In [24]:
cleaned = cleaned.drop(columns = 'Unnamed: 0')
cleaned.head(5)

Unnamed: 0,Full Name,Position,Height,Weight,Forty Yard Dash,3 Cone,Shuttle,Vertical Jump,Broad Jump,Wingspan,Arm Length,Hand Size,Powerball Toss
0,Korey Foreman,DE,76.0,248.0,4.88,7.758,4.7,30.4,104.0,79.3868,33.25,9.691052,44.5
1,J.T. Tuimoloau,DE,76.0,275.0,4.75,8.193,4.9085,22.829,93.0,79.3868,33.25,8.753886,39.0
2,JC Latham,OT,78.0,279.0,5.46,8.2187,4.87,26.8,90.9524,79.5,34.0,10.22096,29.176265
3,Emeka Egbuka,WR,72.5,190.0,4.859,7.083,4.2,35.1,112.0,75.0,32.193956,9.0,41.0
4,Dallas Turner,DE,74.5,223.0,4.991,7.752,4.487,26.0,115.0,83.5,34.25,9.84,38.378617


##  Track & Field Data. Need to do some data cleaning (add ZCruit ID, drop duplicates, drop players with no T&F data) before running the master_fillna function.

Load in original uncleaned ZCruit Data as `original_data` from `original_zcruit_data.csv`. Select relevant columns.

In [25]:
original_data = pd.read_csv("original_zcruit_data.csv", error_bad_lines=False)
good_columns = ["Zcruit ID", "Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"]
original_data = original_data[good_columns]

b'Skipping line 1845: expected 70 fields, saw 72\nSkipping line 3345: expected 70 fields, saw 72\n'
b'Skipping line 11921: expected 70 fields, saw 72\nSkipping line 12609: expected 70 fields, saw 72\nSkipping line 13420: expected 70 fields, saw 72\nSkipping line 14108: expected 70 fields, saw 72\nSkipping line 14512: expected 70 fields, saw 72\n'
b'Skipping line 16512: expected 70 fields, saw 72\nSkipping line 21242: expected 70 fields, saw 72\nSkipping line 23743: expected 70 fields, saw 72\n'
b'Skipping line 58041: expected 70 fields, saw 72\nSkipping line 60541: expected 70 fields, saw 72\n'
b'Skipping line 70025: expected 70 fields, saw 72\nSkipping line 72525: expected 70 fields, saw 72\n'
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Merge uncleaned ZCruit data, `original_data`, with cleaned ZCruit data, `data` to add ZCruit ID to `data`. Save merged dataset as `data_with_zcruitid`. Show first 5 rows. Scroll all the way to the right to see `Zcruit ID` column - we didn't have this on our cleaned dataset so we have to add it back in.

In [26]:
data_with_zcruitid = data.merge(original_data, how = "left", on = ["Full Name", "Grad Year", "Position", "Height Source", "Weight Source", "Committed To"])
data_with_zcruitid = data_with_zcruitid.drop_duplicates()
data_with_zcruitid.head(5)

Unnamed: 0,Full Name,Last Name,Grad Year,Position,Height,Height Source,Weight,Weight Source,Zcruit Rating,Committed To,...,Powerball Toss,Powerball Toss Source,SPARQ Rating,SPARQ Rating Source,GPA,ACT,SAT,Unnamed: 37,Unnamed: 38,Zcruit ID
0,Korey Foreman,Foreman,2021,DE,76.0,nike_opening,248.0,nike_opening,96.697,USC,...,44.5,nike_opening,98.76,nike_opening,3.03,,,,,2481c4e2-f105-4e9f-b85d-0925a343910b
2,J.T. Tuimoloau,Tuimoloau,2021,DE,76.0,national_preps,275.0,national_preps,96.697,,...,,,,,3.0,,,,,c4fcba47-0337-4047-bc4d-0187da6f698e
4,JC Latham,Latham,2021,OT,78.0,national_preps,279.0,nike_opening,96.697,Alabama,...,,,,,2.35,,,,,1db04d99-cca6-4bfc-8a5a-3d40ae1795e0
6,Emeka Egbuka,Egbuka,2021,WR,72.5,nike_opening,190.0,nike_opening,96.3939,Ohio State,...,41.0,nike_opening,,,4.0,,,,,abda246f-bb45-4b10-baf1-a9e5d508cc84
8,Dallas Turner,Turner,2021,DE,74.5,rivals_camp,223.0,rivals_camp,96.3939,Alabama,...,,,,,3.7,,,,,7351f453-1854-48ba-aa2c-e6eb9f4b9859


Load in cleaned track & field data as `tf_data` from `cleaned_tf_data.csv`. The cleaning process conducted to generate `cleaned_tf_data.csv` can be seen in the `Track and Field Cleaning [FINAL]` notebook.

In [27]:
tf_data = pd.read_csv("cleaned_tf_data.csv", sep=",")
tf_data = tf_data.drop(columns = 'Unnamed: 0')
tf_data.head(5)

Unnamed: 0,Zcruit ID,Player Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus
0,7b8c6df6-b548-4ed4-ae9b-4a741d7746c6,Bryson Lightfoot,11.25,,,,,,,,
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.0,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.9,,,,,,490.5,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.5,,,


Merge dataset with Zcruit ID, `data_with_zcruitid`, with track and field data, `tf_data` to create `tf_data_with_pos`, the dataset ready to be cleaned. `tf_data_with_pos` should be a cleaned track and field dataset with an added Position column to be used by the master cleaning function, `master_fillna`.

In [28]:
merge_with_tf = data_with_zcruitid[["Zcruit ID", "Position"]]
tf_data_with_pos = tf_data.merge(merge_with_tf, how = "left", on = "Zcruit ID")
tf_data_with_pos = tf_data_with_pos.rename(columns = {'Player Name': 'Full Name'})
tf_data_with_pos.head(5)

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
0,7b8c6df6-b548-4ed4-ae9b-4a741d7746c6,Bryson Lightfoot,11.25,,,,,,,,,
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.0,,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.9,,,,,,490.5,,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,,CB
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.5,,,,RB


Drop position-filtered athletes with NO track or field metrics. Threshold is 4 because a player with no track and field metrics should only have 3 values (ID, Name, Pos) and nothing else if completely empty. Continue to save data as `tf_data_with_pos`.

In [29]:
tf_data_with_pos = tf_data_with_pos.dropna(thresh = 4)
tf_data_with_pos.head(5)

Unnamed: 0,Zcruit ID,Full Name,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus,Position
1,45be7538-29ad-4e4a-9d2e-be05b0d3f595,Jalik Lewis,,24.95,,,66.0,217.0,,,,
2,ae1749c5-a0f1-4fbc-852b-1ac51f5b1404,Jaiden Brown,,25.9,,,,,,490.5,,
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,12.51,24.94,,,,,,,,CB
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,11.74,25.77,,47.84,,207.5,,,,RB
5,c98e0321-f3a4-4b85-ab2a-bbd16d46ea86,Isaac Jones,11.73,23.28,,,,194.5,,,,RB


### Option 1: Fill in Track and Field Data Yourself 

Code below iterates through all the positions and creates a dictionary called `cleaned_tf_metrics_position_dataframes`  where the keys are the positions and the values are the cleaned datasets for each position using all of the track and field events (`track_events` and `field_events`) and the `tf_data_with_pos` dataset. 

In [30]:
%%time
cleaned_tf_metrics_position_dataframes = {}
for position in positions:
    cleaned_tf_metrics_position_dataframes[position] = master_fillna(tf_data_with_pos, position, track_events + field_events)

WR
{'100 Meter': 'There are 470 NaNs out of 1474 players', '200 Meter': 'There are 403 NaNs out of 1474 players', '110 Meter Hurdles': 'There are 1312 NaNs out of 1474 players', '300 Meter Hurdles': 'There are 1311 NaNs out of 1474 players', 'High Jump': 'There are 1167 NaNs out of 1474 players', 'Long Jump': 'There are 768 NaNs out of 1474 players', 'Triple Jump': 'There are 1177 NaNs out of 1474 players', 'Shotput': 'There are 1427 NaNs out of 1474 players', 'Discus': 'There are 1444 NaNs out of 1474 players'}
100 Meter ['200 Meter' '300 Meter Hurdles']
[0.31758863] 4.219561642764204
The RMSE of the model on the training data is 0.5621377260998385. The average 100 Meter of the training data is 12.094736842105252. On average, predictions are 0.04647787987770644 from the actual values in the training data
200 Meter ['100 Meter' '110 Meter Hurdles']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[1.5397188] 6.1742857289888775
The RMSE of the model on the training data is 1.2377443899706517. The average 200 Meter of the training data is 24.79677944862156. On average, predictions are 0.04991553006047555 from the actual values in the training data
110 Meter Hurdles ['300 Meter Hurdles' 'Discus' '200 Meter']
[0.31671364] 3.77111980438878
The RMSE of the model on the training data is 1.4796834930099672. The average 110 Meter Hurdles of the training data is 18.078558558558555. On average, predictions are 0.08184742650898301 from the actual values in the training data
300 Meter Hurdles ['Discus' '110 Meter Hurdles' '100 Meter']
[0.0299289] 13.402207489960972
The RMSE of the model on the training data is 1.551026895201722. The average 300 Meter Hurdles of the training data is 44.14666666666667. On average, predictions are 0.03513349958928697 from the actual values in the training data
High Jump ['Triple Jump' 'Long Jump']
[0.06761343] 33.369442824787136
The RMSE of the model on the tr

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Discus ['300 Meter Hurdles' '110 Meter Hurdles']
[23.80155095] -23.509135753591863
The RMSE of the model on the training data is 43.73978091057082. The average Discus of the training data is 1027.25. On average, predictions are 0.042579489813162154 from the actual values in the training data
100 Meter
There are 470 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 403 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 1312 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 1311 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 1167 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 768 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 1177 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 1427 NaN values for this metric
There

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.28429891] 12.307418050731833
The RMSE of the model on the training data is 1.6264207462721445. The average 200 Meter of the training data is 25.518205128205125. On average, predictions are 0.06373570312257076 from the actual values in the training data
110 Meter Hurdles ['Discus' '300 Meter Hurdles']
[0.00391856] 14.241342045782845
The RMSE of the model on the training data is 0.01585359919964183. The average 110 Meter Hurdles of the training data is 17.61. On average, predictions are 0.0009002611697695531 from the actual values in the training data
300 Meter Hurdles ['110 Meter Hurdles' 'Discus' '200 Meter']
[1.40579733] 20.965993998222746
The RMSE of the model on the training data is 2.5453924447680345. The average 300 Meter Hurdles of the training data is 45.7565909090909. On average, predictions are 0.0556289792179932 from the actual values in the training data
High Jump ['Shotput' 'Long Jump']
[0.04761492] 42.92021518259498
The RMSE of the model on the training data is 4.184827

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy


There are 0 NaN values for this metric
110 Meter Hurdles
There are 556 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 562 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 532 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 459 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 537 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 315 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are 392 NaN values for this metric
There are 0 NaN values for this metric
QB
{'100 Meter': 'There are 147 NaNs out of 350 players', '200 Meter': 'There are 135 NaNs out of 350 players', '110 Meter Hurdles': 'There are 307 NaNs out of 350 players', '300 Meter Hurdles': 'There are 298 NaNs out of 350 players', 'High Jump': 'There are 274 NaNs out of 350 players', 'Long Jump': 'There are 214

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.26479812] 5.61437901726127
The RMSE of the model on the training data is 0.4744841116237274. The average 100 Meter of the training data is 12.371360544217689. On average, predictions are 0.038353430079725456 from the actual values in the training data
200 Meter ['100 Meter' '110 Meter Hurdles' '300 Meter Hurdles']
[2.02138766] 0.5101675086123123
The RMSE of the model on the training data is 1.310958760771062. The average 200 Meter of the training data is 25.51748299319727. On average, predictions are 0.05137492444378436 from the actual values in the training data
110 Meter Hurdles ['300 Meter Hurdles' '100 Meter' '200 Meter']
[0.4923124] -4.3569257342784375
The RMSE of the model on the training data is 1.3710642033790537. The average 110 Meter Hurdles of the training data is 18.233225806451614. On average, predictions are 0.0751959207840183 from the actual values in the training data
300 Meter Hurdles ['110 Meter Hurdles' '100 Meter' '200 Meter']
[1.29451862] 22.282556127452057
The 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[0.26460737] 98.92220875578549
The RMSE of the model on the training data is 18.26045643927761. The average Long Jump of the training data is 217.26785714285714. On average, predictions are 0.08404582564309578 from the actual values in the training data
Triple Jump ['Long Jump' 'High Jump']
[1.12009279] 203.8898397626761
The RMSE of the model on the training data is 37.56969399175379. The average Triple Jump of the training data is 447.25. On average, predictions are 0.08400155168642547 from the actual values in the training data
Shotput ['Discus' 'High Jump']
[0.19667434] 190.44385056254404
The RMSE of the model on the training data is 48.553911240466064. The average Shotput of the training data is 429.25. On average, predictions are 0.11311336340236706 from the actual values in the training data
Discus ['Shotput' 'Long Jump']
[2.82242605] 2.6947707551755684
The RMSE of the model on the training data is 183.9338454373405. The average Discus of the training data is 1214.2211538461538. 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

[2.58717616] 33.1229705703272
The RMSE of the model on the training data is 229.01820115624517. The average Discus of the training data is 1171.552795031056. On average, predictions are 0.1954826125869763 from the actual values in the training data
100 Meter
There are 210 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 207 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 251 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 255 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 247 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 251 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 260 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 69 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice fr

[0.27350399] 5.242504069617169
The RMSE of the model on the training data is 0.5798483754418956. The average 100 Meter of the training data is 12.00779329608939. On average, predictions are 0.04828933686181426 from the actual values in the training data
200 Meter ['100 Meter']
[1.60317795] 5.4849850757406
The RMSE of the model on the training data is 1.4038589608569285. The average 200 Meter of the training data is 24.735614525139678. On average, predictions are 0.05675456170414271 from the actual values in the training data
110 Meter Hurdles ['300 Meter Hurdles' '100 Meter']
[0.35942971] 1.85448426182516
The RMSE of the model on the training data is 1.6896090735781912. The average 110 Meter Hurdles of the training data is 18.38446428571429. On average, predictions are 0.09190417775138042 from the actual values in the training data
300 Meter Hurdles ['110 Meter Hurdles' '100 Meter']
[0.90533603] 29.345346369315497
The RMSE of the model on the training data is 2.681539177448454. The ave

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean


[180.] -10854.999999999998
The RMSE of the model on the training data is 56.166419386201454. The average Discus of the training data is 965.0. On average, predictions are 0.05820354340539011 from the actual values in the training data
100 Meter
There are 171 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 161 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 547 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 538 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 522 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 343 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 504 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 608 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are 609 NaN valu

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

Long Jump []
Triple Jump []
Shotput ['Discus']
[0.18797399] 226.92981088681137
The RMSE of the model on the training data is 51.4718407953037. The average Shotput of the training data is 432.9621198488889. On average, predictions are 0.11888301178234309 from the actual values in the training data
Discus ['Shotput']
[2.50988192] 9.38440899893294
The RMSE of the model on the training data is 188.08210205567593. The average Discus of the training data is 1096.068204071111. On average, predictions are 0.17159707886524322 from the actual values in the training data
100 Meter
There are 299 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 299 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 314 NaN values for this metric
There are 314 NaN values for this metric
300 Meter Hurdles
There are 313 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 313 NaN values for this metric
Ther

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

Shotput ['Discus']
[0.20037025] 217.16137935856565
The RMSE of the model on the training data is 50.96942748717193. The average Shotput of the training data is 455.18636363636364. On average, predictions are 0.1119748559249241 from the actual values in the training data
Discus ['Shotput']
[2.55145259] 26.539332399011073
The RMSE of the model on the training data is 181.880811662674. The average Discus of the training data is 1187.9257575757576. On average, predictions are 0.15310789458243979 from the actual values in the training data
100 Meter
There are 221 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 227 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 233 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 233 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 235 NaN values for this metric
There are 235 NaN values for thi

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[13.29496488] -1713.6447843491221
The RMSE of the model on the training data is 167.24754240327607. The average Discus of the training data is 1112.2. On average, predictions are 0.15037542025110237 from the actual values in the training data
100 Meter
There are 128 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 103 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 313 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 303 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 294 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 182 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 273 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 338 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are 344 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice fr

300 Meter Hurdles []
High Jump []
Long Jump []
Triple Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric[metric] = only_percentiles['Predicted' + " " + metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percent

Shotput ['Discus']
[0.13865638] 286.12387694654313
The RMSE of the model on the training data is 61.65389993127462. The average Shotput of the training data is 446.9789393419689. On average, predictions are 0.13793468663655595 from the actual values in the training data
Discus ['100 Meter' 'Shotput']
[55.41511002] 163.61010005909372
The RMSE of the model on the training data is 166.94228893401942. The average Discus of the training data is 1022.0909090909091. On average, predictions are 0.1633340903917294 from the actual values in the training data
100 Meter
There are 241 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 247 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 254 NaN values for this metric
There are 254 NaN values for this metric
300 Meter Hurdles
There are 254 NaN values for this metric
There are 254 NaN values for this metric
High Jump
There are 252 NaN values for this metric
There are 0 Na

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data1[metric] = completed_metrics[metric]


100 Meter ['200 Meter']
[0.50315108] -0.018093374996515976
The RMSE of the model on the training data is 1.2761593320492757. The average 100 Meter of the training data is 14.846666666666666. On average, predictions are 0.08595594962163959 from the actual values in the training data
200 Meter ['100 Meter']
[1.34599157] 9.55984515037942
The RMSE of the model on the training data is 2.0872615191940183. The average 200 Meter of the training data is 29.543333333333333. On average, predictions are 0.07065084686429036 from the actual values in the training data
110 Meter Hurdles []
300 Meter Hurdles []
High Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

Long Jump []
Triple Jump []
Shotput ['Discus']
[0.18010766] 234.99447342063993
The RMSE of the model on the training data is 63.00736922266051. The average Shotput of the training data is 438.7752976190476. On average, predictions are 0.14359825989421265 from the actual values in the training data
Discus ['Shotput']
[1.82442589] 330.92597332033813
The RMSE of the model on the training data is 200.53416465789383. The average Discus of the training data is 1131.438988095238. On average, predictions are 0.17723816022593522 from the actual values in the training data
100 Meter
There are 214 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 217 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 224 NaN values for this metric
There are 224 NaN values for this metric
300 Meter Hurdles
There are 224 NaN values for this metric
There are 224 NaN values for this metric
High Jump
There are 224 NaN values for this metric

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

100 Meter ['200 Meter' 'Discus' '300 Meter Hurdles' '110 Meter Hurdles']
[0.33954367] 3.60265317321781
The RMSE of the model on the training data is 0.4899407497766905. The average 100 Meter of the training data is 12.021195652173919. On average, predictions are 0.04075640759478774 from the actual values in the training data
200 Meter ['100 Meter' '110 Meter Hurdles']
[1.62968859] 5.202890255274269
The RMSE of the model on the training data is 1.0733665428788888. The average 200 Meter of the training data is 24.7936956521739. On average, predictions are 0.04329191411949822 from the actual values in the training data
110 Meter Hurdles ['300 Meter Hurdles' '200 Meter' '100 Meter']
[0.34407221] 1.9593400334736586
The RMSE of the model on the training data is 1.7547017488543466. The average 110 Meter Hurdles of the training data is 18.246521739130436. On average, predictions are 0.09616636934651028 from the actual values in the training data
300 Meter Hurdles ['110 Meter Hurdles' '100 Mete

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

[2.6678536] -86.79518425524157
The RMSE of the model on the training data is 215.11373249448798. The average Discus of the training data is 1099.171052631579. On average, predictions are 0.1957054199885211 from the actual values in the training data
100 Meter
There are 94 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 86 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 283 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 276 NaN values for this metric
There are 0 NaN values for this metric
High Jump
There are 255 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 159 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 263 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 287 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

100 Meter ['Triple Jump' '200 Meter']
[0.02556213] 0.5660355029585844
The RMSE of the model on the training data is 0.0. The average 100 Meter of the training data is 12.2. On average, predictions are 0.0 from the actual values in the training data
200 Meter ['100 Meter']
[2.52517557] -5.054848528091316
The RMSE of the model on the training data is 2.0535597782775112. The average 200 Meter of the training data is 26.668571428571425. On average, predictions are 0.07700299147173013 from the actual values in the training data
110 Meter Hurdles []
300 Meter Hurdles []
High Jump ['Long Jump' 'Shotput' 'Triple Jump']
[0.21561682] 20.44909902972431
The RMSE of the model on the training data is 0.34451694390375986. The average High Jump of the training data is 65.33333333333333. On average, predictions are 0.005273218529139182 from the actual values in the training data
Long Jump ['High Jump' 'Triple Jump']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is tryi

[4.60526316] -92.71052631578951
The RMSE of the model on the training data is 1.5921958796888795. The average Long Jump of the training data is 208.16666666666666. On average, predictions are 0.007648659149826483 from the actual values in the training data
Triple Jump ['100 Meter' 'Discus' 'Shotput' 'High Jump' 'Long Jump']
[39.12037037] -22.14351851851842
The RMSE of the model on the training data is 5.684341886080802e-14. The average Triple Jump of the training data is 455.125. On average, predictions are 1.248962787383862e-16 from the actual values in the training data
Shotput ['Triple Jump' 'High Jump' 'Discus']
[1.26303855] -27.225056689342466
The RMSE of the model on the training data is 19.50843616573746. The average Shotput of the training data is 468.8333333333333. On average, predictions are 0.041610599713624165 from the actual values in the training data
Discus ['Triple Jump' 'Shotput']
[5.69659864] -1294.839115646259
The RMSE of the model on the training data is 19.45011498

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy

100 Meter []
200 Meter []
110 Meter Hurdles []
300 Meter Hurdles []
High Jump []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Long Jump []
Triple Jump []
Shotput ['Discus']
[0.12181053] 301.8953850112098
The RMSE of the model on the training data is 64.09689909530566. The average Shotput of the training data is 429.39. On average, predictions are 0.149274317276382 from the actual values in the training data
Discus ['Shotput']
[1.62103801] 350.6058241562564
The RMSE of the model on the training data is 233.8251396112259. The average Discus of the training data is 1046.6633333333334. On average, predictions are 0.22340052638182847 from the actual values in the training data
100 Meter
There are 94 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 97 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 97 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 98 NaN values for this metric
There are 98 NaN values for this metric
High Jump
There are 98 NaN values for this metric
There are 98 NaN value

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

100 Meter []
200 Meter []
110 Meter Hurdles []
300 Meter Hurdles []
High Jump []
Long Jump ['Triple Jump']
[0.39130609] 17.16579031869628
The RMSE of the model on the training data is 19.124705444162398. The average Long Jump of the training data is 213.66666666666666. On average, predictions are 0.0895072017667507 from the actual values in the training data
Triple Jump ['Long Jump']
[0.57407844] 379.5052408432458
The RMSE of the model on the training data is 23.164453927613643. The average Triple Jump of the training data is 502.1666666666667. On average, predictions are 0.046129015454922616 from the actual values in the training data
Shotput []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Discus []
100 Meter
There are 4 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 7 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 13 NaN values for this metric
There are 0 NaN values for this metric
300 Meter Hurdles
There are 14 NaN values for this metric
There are 14 NaN values for this metric
High Jump
There are 13 NaN values for this metric
There are 0 NaN values for this metric
Long Jump
There are 7 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 10 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 12 NaN values for this metric
There are 0 NaN values for this metric
Discus
There are 12 NaN values for this metric
There are 0 NaN values for this metric
FB
{'100 Meter': 'There are 16 NaNs out of 24 players', '200 Meter': 'There are 16 NaNs out of 24 players', '110 Meter Hurdles': 'There are 23 NaNs out of 24 players', '300 Meter

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty_metric_with_predictor_nulls[metric] = metric_mean
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(l

[0.2922722] 5.2160218206908535
The RMSE of the model on the training data is 0.3908311892622584. The average 100 Meter of the training data is 13.25. On average, predictions are 0.02949669352922705 from the actual values in the training data
200 Meter ['100 Meter']
[1.96471179] 1.4555688482550941
The RMSE of the model on the training data is 1.0133159680961183. The average 200 Meter of the training data is 27.488. On average, predictions are 0.03686393946799034 from the actual values in the training data
110 Meter Hurdles []
300 Meter Hurdles []
High Jump []
Long Jump []
Triple Jump []
Shotput ['Discus']
[0.11964887] 291.18577404971495
The RMSE of the model on the training data is 45.91363572953311. The average Shotput of the training data is 417.13461538461536. On average, predictions are 0.11006910967386115 from the actual values in the training data
Discus ['Shotput']
[2.76699012] -101.55351395098478
The RMSE of the model on the training data is 220.79599481856448. The average Discu

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

Discus ['Shotput']
[2.52123532] 94.47865562029347
The RMSE of the model on the training data is 183.95348440201985. The average Discus of the training data is 1162.6. On average, predictions are 0.15822594564082218 from the actual values in the training data
100 Meter
There are 12 NaN values for this metric
There are 0 NaN values for this metric
200 Meter
There are 11 NaN values for this metric
There are 0 NaN values for this metric
110 Meter Hurdles
There are 13 NaN values for this metric
There are 13 NaN values for this metric
300 Meter Hurdles
There are 13 NaN values for this metric
There are 13 NaN values for this metric
High Jump
There are 13 NaN values for this metric
There are 13 NaN values for this metric
Long Jump
There are 12 NaN values for this metric
There are 0 NaN values for this metric
Triple Jump
There are 11 NaN values for this metric
There are 0 NaN values for this metric
Shotput
There are 2 NaN values for this metric
There are 0 NaN values for this metric
Discus
Ther

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles[metric + " " + "Percentile"] = only_percentiles.mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  only_percentiles['Predicted' + " " + metric] = only_percentiles.apply(lambda x: percentile_to_value(metric, x[metric + " " + "Percentile"], metric_dict), axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

An example of accessing the cleaned, filled-in T&F data for the WR position using the `cleaned_tf_metrics_position_dataframes` dictionary. Show first 5 rows.

In [31]:
cleaned_tf_metrics_position_dataframes['WR'].head(5)

Unnamed: 0,Zcruit ID,Full Name,Position,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus
6,7e546367-e3bd-44b4-a1e7-1b2cf182d224,David Godsey,WR,11.17,23.18,17.91,44.81,64.0,222.75,461.5,471.597175,982.0
9,546e641d-b263-40a2-bcf4-7eea98eed7c7,Nyckoles Harbor,WR,11.311316,22.33,17.91,44.81,64.0,222.75,461.5,508.9174,982.0
16,7f671d52-520b-45ab-93d3-4eb3b59ab5eb,Armond Scott,WR,11.667015,23.45,17.91,44.81,64.0,222.75,461.5,460.90425,982.0
17,1bb159dd-b94a-4684-8493-fde202510adc,Jordan Doucet,WR,11.24,24.31,17.91,44.81,64.0,276.75,517.4765,460.9083,982.0
18,40efd2c2-389a-46d1-af39-065243eeb086,Markevus Jackson,WR,11.71,22.88,17.91,44.81,64.0,222.75,461.5,455.225625,982.0


#### Combine all filled-in track and field datasets by position into one master filled-in track and field dataset called df_tf.
 Master combined cleaned and filled in track and field dataset is saved as `df_tf` variable. 

In [32]:
column_names = cleaned_tf_metrics_position_dataframes['QB'].columns
df_tf = pd.DataFrame(columns = column_names)
for position in positions:
    df_tf = df_tf.append(cleaned_tf_metrics_position_dataframes[position])
df_tf = df_tf.sort_index()

In [33]:
df_tf.head(5)

Unnamed: 0,Zcruit ID,Full Name,Position,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus
3,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,CB,12.51,24.94,18.455,45.845,66.0,212.5,450.875,378.75,1103.0
4,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,RB,11.74,25.77,17.52,47.84,63.0,207.5,438.017729,436.75,831.0
5,c98e0321-f3a4-4b85-ab2a-bbd16d46ea86,Isaac Jones,RB,11.73,23.28,17.52,44.91,63.0,194.5,423.156296,436.75,831.0
6,7e546367-e3bd-44b4-a1e7-1b2cf182d224,David Godsey,WR,11.17,23.18,17.91,44.81,64.0,222.75,461.5,471.597175,982.0
7,20396bae-d68b-4b18-858e-e341d20f2b85,Hunt Young,QB,12.22,24.68,17.65,45.24,64.0,217.75,451.75,430.25,1173.5


Like the ZCruit data, some metrics remain completely empty because there are no players at that position with a value for that event. The following code shows which events remain completely empty for each position. For example, at OL, there are no players who have run the 110 Meter Hurdles or done the Triple Jump (which makes sense). The dictionary of metrics and empty values is called `still_nan_events_for_positions_tf`

In [34]:
still_nan_events_for_positions_tf = {}
for position in positions:
    series = df_tf[df_tf['Position'] == position].isna().sum() > 0
    still_nan_events_for_positions_tf[position] = series[series].index.values
still_nan_events_for_positions_tf

{'WR': array([], dtype=object),
 'RB': array([], dtype=object),
 'LB': array([], dtype=object),
 'QB': array([], dtype=object),
 'DE': array([], dtype=object),
 'CB': array([], dtype=object),
 'OL': array(['110 Meter Hurdles', 'Triple Jump'], dtype=object),
 'DT': array(['High Jump'], dtype=object),
 'S': array([], dtype=object),
 'OT': array(['110 Meter Hurdles', '300 Meter Hurdles', 'Long Jump',
        'Triple Jump'], dtype=object),
 'OG': array(['110 Meter Hurdles', '300 Meter Hurdles', 'High Jump', 'Long Jump',
        'Triple Jump'], dtype=object),
 'TE': array([], dtype=object),
 'ATH': array([], dtype=object),
 'DB': array([], dtype=object),
 'DL': array([], dtype=object),
 'OC': array(['300 Meter Hurdles', 'High Jump', 'Long Jump', 'Triple Jump'],
       dtype=object),
 'K': array(['300 Meter Hurdles'], dtype=object),
 'FB': array(['300 Meter Hurdles'], dtype=object),
 'LS': array(['110 Meter Hurdles', '300 Meter Hurdles', 'High Jump'],
       dtype=object),
 'P': array(['110 

### Option 2: Load in Filled In Track & Field Data.
Read the master cleaned and filled in track and field dataset called `cleaned_filled_tf_data.csv`. This dataset is the result of all the work in Option 1. Do this if you don't want to run the cells in Option 1 which can take up to a minute. Save dataset as variable called `filled_tf`. Dataset should look exactly like `df_tf`

In [35]:
filled_tf = pd.read_csv('cleaned_filled_tf_data.csv')

In [36]:
filled_tf = filled_tf.drop(columns = 'Unnamed: 0')
filled_tf.head(5)

Unnamed: 0,Zcruit ID,Full Name,Position,100 Meter,200 Meter,110 Meter Hurdles,300 Meter Hurdles,High Jump,Long Jump,Triple Jump,Shotput,Discus
0,eb32fcc6-a8f5-41df-b524-a4fc824a14f3,Mark Phifer,CB,12.51,24.94,18.455,45.845,66.0,212.5,450.875,378.75,1103.0
1,9712f1c5-6319-49ba-b8df-70b4102d3737,Gerimiah Brown,RB,11.74,25.77,17.52,47.84,63.0,207.5,438.017729,436.75,831.0
2,c98e0321-f3a4-4b85-ab2a-bbd16d46ea86,Isaac Jones,RB,11.73,23.28,17.52,44.91,63.0,194.5,423.156296,436.75,831.0
3,7e546367-e3bd-44b4-a1e7-1b2cf182d224,David Godsey,WR,11.17,23.18,17.91,44.81,64.0,222.75,461.5,471.597175,982.0
4,20396bae-d68b-4b18-858e-e341d20f2b85,Hunt Young,QB,12.22,24.68,17.65,45.24,64.0,217.75,451.75,430.25,1173.5
