# Generate Bat Outcome
 - Given the batter has made contact on the given pitch characteristics, what is the batting outcome?
 - I.e., what is the launch_speed_angle, and what is the hit_location

## Potential Difficulties:
 - How to factor in bunting?

$$\alpha$$

In [1]:
import xgboost as xgb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.features import build_features as f
from src.data import data_utils as du
from importlib import reload
reload(f)

vladdy = 665489
soto = 665742
schneider = 676914
biggio = 624415
batter = soto
X_train, y_train, X_test, y_test, encoders = f.get_hit_outcome_dataset(batter, split=True)

In [16]:
import numpy as np
import pandas as pd
from scipy.stats import norm, gaussian_kde
from scipy.interpolate import interp1d
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss

def evaluate_kde_bandwidth(dataframe, bandwidth):
    kde_estimators = []
    inverse_cdfs = []
    for column in dataframe.columns:
        kde = gaussian_kde(dataframe[column], bw_method=bandwidth)
        kde_estimators.append(kde)
        
        # Compute the inverse CDF using interpolation
        x_vals = np.linspace(dataframe[column].min(), dataframe[column].max(), 1000)
        cdf_vals = np.array([kde.integrate_box_1d(-np.inf, x) for x in x_vals])
        inverse_cdf = interp1d(cdf_vals, x_vals, bounds_error=False, fill_value=(x_vals[0], x_vals[-1]))
        inverse_cdfs.append(inverse_cdf)
    
    return kde_estimators, inverse_cdfs

def generate_gaussian_copula_kde(dataframe, kde_estimators, inverse_cdfs, n_samples=None):
    if n_samples is None:
        n_samples = len(dataframe)
    
    # Step 2: Transform data to uniform marginals using the CDF of KDEs
    uniform_marginals = np.zeros_like(dataframe.values)
    for i, column in enumerate(dataframe.columns):
        cdf_values = np.array([kde_estimators[i].integrate_box_1d(-np.inf, x) for x in dataframe[column]])
        uniform_marginals[:, i] = cdf_values
    
    # Step 3: Transform uniform marginals to standard normal marginals
    normal_marginals = norm.ppf(uniform_marginals)
    
    # Ensure no infinite values (can occur if uniform marginals are exactly 0 or 1)
    normal_marginals[np.isinf(normal_marginals)] = np.nan
    normal_marginals = np.nan_to_num(normal_marginals)
    
    # Step 4: Generate new samples from a multivariate normal distribution
    mean = np.zeros(dataframe.shape[1])
    cov = np.corrcoef(normal_marginals, rowvar=False)
    new_samples_normal = np.random.multivariate_normal(mean, cov, size=n_samples)
    
    # Step 5: Transform new samples from standard normal to uniform marginals
    new_samples_uniform = norm.cdf(new_samples_normal)
    
    # Step 6: Transform uniform marginals back to original KDE marginals
    new_samples = np.zeros_like(new_samples_uniform)
    for i, column in enumerate(dataframe.columns):
        new_samples[:, i] = inverse_cdfs[i](new_samples_uniform[:, i])
    
    # Create a DataFrame with the new samples
    generated_dataframe = pd.DataFrame(new_samples, columns=dataframe.columns)
    
    return generated_dataframe

def cross_validate_kde(data, bandwidths, n_splits=5):
    kf = KFold(n_splits=n_splits)
    results = []
    
    for bandwidth in bandwidths:
        log_likelihoods = []
        
        for train_index, test_index in kf.split(data):
            train_data, test_data = data.iloc[train_index], data.iloc[test_index]
            kde_estimators, inverse_cdfs = evaluate_kde_bandwidth(train_data, bandwidth)
            generated_data = generate_gaussian_copula_kde(train_data, kde_estimators, inverse_cdfs, n_samples=len(test_data))
            
            # Evaluate the generated data
            for column in data.columns:
                kde = gaussian_kde(generated_data[column])
                log_likelihood = kde.logpdf(test_data[column]).sum()
                log_likelihoods.append(log_likelihood)
        
        avg_log_likelihood = np.mean(log_likelihoods)
        results.append((bandwidth, avg_log_likelihood))
    
    return results


bandwidths = ['scott', 'silverman', 0.1, 0.2, 0.5, 1.0]
results = cross_validate_kde(y_train, bandwidths)

for bandwidth, avg_log_likelihood in results:
    print(f'Bandwidth: {bandwidth}, Average Log Likelihood: {avg_log_likelihood}')


Bandwidth: scott, Average Log Likelihood: -386.3460999213018
Bandwidth: silverman, Average Log Likelihood: -383.07674515533836
Bandwidth: 0.1, Average Log Likelihood: -381.24942209010254
Bandwidth: 0.2, Average Log Likelihood: -383.1022594355098
Bandwidth: 0.5, Average Log Likelihood: -386.6951514332781
Bandwidth: 1.0, Average Log Likelihood: -393.25794008776904


In [17]:
import numpy as np
import pandas as pd
from scipy.stats import norm, gaussian_kde
from scipy.interpolate import interp1d
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss

def evaluate_kde_bandwidth(dataframe, bandwidth):
    kde_estimators = []
    inverse_cdfs = []
    for column in dataframe.columns:
        kde = gaussian_kde(dataframe[column], bw_method=bandwidth)
        kde_estimators.append(kde)
        
        # Compute the inverse CDF using interpolation
        x_vals = np.linspace(dataframe[column].min(), dataframe[column].max(), 1000)
        cdf_vals = np.array([kde.integrate_box_1d(-np.inf, x) for x in x_vals])
        inverse_cdf = interp1d(cdf_vals, x_vals, bounds_error=False, fill_value=(x_vals[0], x_vals[-1]))
        inverse_cdfs.append(inverse_cdf)
    
    return kde_estimators, inverse_cdfs

def generate_gaussian_copula_kde(dataframe, kde_estimators, inverse_cdfs, n_samples=None):
    if n_samples is None:
        n_samples = len(dataframe)
    
    # Step 2: Transform data to uniform marginals using the CDF of KDEs
    uniform_marginals = np.zeros_like(dataframe.values)
    for i, column in enumerate(dataframe.columns):
        cdf_values = np.array([kde_estimators[i].integrate_box_1d(-np.inf, x) for x in dataframe[column]])
        uniform_marginals[:, i] = cdf_values
    
    # Step 3: Transform uniform marginals to standard normal marginals
    normal_marginals = norm.ppf(uniform_marginals)
    
    # Ensure no infinite values (can occur if uniform marginals are exactly 0 or 1)
    normal_marginals[np.isinf(normal_marginals)] = np.nan
    normal_marginals = np.nan_to_num(normal_marginals)
    
    # Step 4: Generate new samples from a multivariate normal distribution
    mean = np.zeros(dataframe.shape[1])
    cov = np.corrcoef(normal_marginals, rowvar=False)
    new_samples_normal = np.random.multivariate_normal(mean, cov, size=n_samples)
    
    # Step 5: Transform new samples from standard normal to uniform marginals
    new_samples_uniform = norm.cdf(new_samples_normal)
    
    # Step 6: Transform uniform marginals back to original KDE marginals
    new_samples = np.zeros_like(new_samples_uniform)
    for i, column in enumerate(dataframe.columns):
        new_samples[:, i] = inverse_cdfs[i](new_samples_uniform[:, i])
    
    # Create a DataFrame with the new samples
    generated_dataframe = pd.DataFrame(new_samples, columns=dataframe.columns)
    
    return generated_dataframe

def cross_validate_kde(data, bandwidths, n_splits=5):
    kf = KFold(n_splits=n_splits)
    results = []
    
    for bandwidth in bandwidths:
        train_log_likelihoods = []
        
        for train_index, test_index in kf.split(data):
            train_data = data.iloc[train_index]
            kde_estimators, inverse_cdfs = evaluate_kde_bandwidth(train_data, bandwidth)
            generated_data = generate_gaussian_copula_kde(train_data, kde_estimators, inverse_cdfs, n_samples=len(train_data))
            
            # Evaluate the generated data
            log_likelihoods = []
            for column in data.columns:
                kde = gaussian_kde(generated_data[column])
                log_likelihood = kde.logpdf(train_data[column]).sum()
                log_likelihoods.append(log_likelihood)
            train_log_likelihoods.append(np.mean(log_likelihoods))
        
        avg_train_log_likelihood = np.mean(train_log_likelihoods)
        results.append((bandwidth, avg_train_log_likelihood))
    
    return results

def evaluate_on_test_set(data, test_data, best_bandwidth):
    kde_estimators, inverse_cdfs = evaluate_kde_bandwidth(data, best_bandwidth)
    generated_data = generate_gaussian_copula_kde(data, kde_estimators, inverse_cdfs, n_samples=len(test_data))
    
    log_likelihoods = []
    for column in test_data.columns:
        kde = gaussian_kde(generated_data[column])
        log_likelihood = kde.logpdf(test_data[column]).sum()
        log_likelihoods.append(log_likelihood)
    
    avg_test_log_likelihood = np.mean(log_likelihoods)
    return avg_test_log_likelihood


bandwidths = ['scott', 'silverman', 0.1, 0.2, 0.5, 1.0]
results = cross_validate_kde(y_train, bandwidths)

# Find the best bandwidth based on the highest average log likelihood
best_bandwidth = max(results, key=lambda x: x[1])[0]
print(f'Best Bandwidth: {best_bandwidth}')

# Evaluate on the test set
test_log_likelihood = evaluate_on_test_set(y_train, y_test, best_bandwidth)
print(f'Test Set Log Likelihood: {test_log_likelihood}')


Best Bandwidth: 0.1
Test Set Log Likelihood: -211.57732259582986


In [15]:
import numpy as np
import pandas as pd
from scipy.stats import norm, gaussian_kde
from scipy.interpolate import interp1d

def generate_gaussian_copula_kde(dataframe, n_samples=None):
    if n_samples is None:
        n_samples = len(dataframe)
    
    # Step 1: Estimate the marginal distributions using KDE
    kde_estimators = []
    inverse_cdfs = []
    for column in dataframe.columns:
        kde = gaussian_kde(dataframe[column])
        kde_estimators.append(kde)
        
        # Compute the inverse CDF using interpolation
        x_vals = np.linspace(dataframe[column].min(), dataframe[column].max(), 1000)
        cdf_vals = np.array([kde.integrate_box_1d(-np.inf, x) for x in x_vals])
        inverse_cdf = interp1d(cdf_vals, x_vals, bounds_error=False, fill_value=(x_vals[0], x_vals[-1]))
        inverse_cdfs.append(inverse_cdf)
    
    # Step 2: Transform data to uniform marginals using the CDF of KDEs
    uniform_marginals = np.zeros_like(dataframe.values)
    for i, column in enumerate(dataframe.columns):
        cdf_values = np.array([kde_estimators[i].integrate_box_1d(-np.inf, x) for x in dataframe[column]])
        uniform_marginals[:, i] = cdf_values
    
    # Step 3: Transform uniform marginals to standard normal marginals
    normal_marginals = norm.ppf(uniform_marginals)
    
    # Ensure no infinite values (can occur if uniform marginals are exactly 0 or 1)
    normal_marginals[np.isinf(normal_marginals)] = np.nan
    normal_marginals = np.nan_to_num(normal_marginals)
    
    # Step 4: Generate new samples from a multivariate normal distribution
    mean = np.zeros(dataframe.shape[1])
    cov = np.corrcoef(normal_marginals, rowvar=False)
    new_samples_normal = np.random.multivariate_normal(mean, cov, size=n_samples)
    
    # Step 5: Transform new samples from standard normal to uniform marginals
    new_samples_uniform = norm.cdf(new_samples_normal)
    
    # Step 6: Transform uniform marginals back to original KDE marginals
    new_samples = np.zeros_like(new_samples_uniform)
    for i, column in enumerate(dataframe.columns):
        new_samples[:, i] = inverse_cdfs[i](new_samples_uniform[:, i])
    
    # Create a DataFrame with the new samples
    generated_dataframe = pd.DataFrame(new_samples, columns=dataframe.columns)
    
    return generated_dataframe

generated_df = generate_gaussian_copula_kde(y_train, 1)

print(y_train.describe())
print(generated_df.describe())
print(np.corrcoef(y_train,rowvar=False))
print(np.corrcoef(generated_df,rowvar=False))

       launch_speed  launch_angle  spray_angle
count    429.000000    429.000000   429.000000
mean      92.833566      7.606061    88.948718
std       14.811679     30.041576    22.255256
min       21.000000    -81.000000    48.000000
25%       84.600000    -11.000000    70.000000
50%       96.600000      8.000000    87.000000
75%      104.100000     27.000000   106.000000
max      115.700000     82.000000   147.000000
       launch_speed  launch_angle  spray_angle
count      1.000000      1.000000     1.000000
mean     102.233208     22.032888    72.469638
std             NaN           NaN          NaN
min      102.233208     22.032888    72.469638
25%      102.233208     22.032888    72.469638
50%      102.233208     22.032888    72.469638
75%      102.233208     22.032888    72.469638
max      102.233208     22.032888    72.469638
[[ 1.          0.32003678 -0.10351609]
 [ 0.32003678  1.          0.42257853]
 [-0.10351609  0.42257853  1.        ]]
1.0


In [9]:
np.corrcoef(copula_df)

array([[ 1.        , -0.85874961,  0.97369666, ..., -0.9748226 ,
        -0.87770223,  0.95220237],
       [-0.85874961,  1.        , -0.95291002, ...,  0.9513834 ,
         0.99926964, -0.66118303],
       [ 0.97369666, -0.95291002,  1.        , ..., -0.99998753,
        -0.96380206,  0.85755595],
       ...,
       [-0.9748226 ,  0.9513834 , -0.99998753, ...,  1.        ,
         0.9624583 , -0.86011459],
       [-0.87770223,  0.99926964, -0.96380206, ...,  0.9624583 ,
         1.        , -0.68936795],
       [ 0.95220237, -0.66118303,  0.85755595, ..., -0.86011459,
        -0.68936795,  1.        ]])

In [8]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from bisect import bisect
from copy import deepcopy
from sdv.evaluation.single_table import run_diagnostic, evaluate_quality
from sdv.evaluation.single_table import get_column_plot
from sdv.metadata import SingleTableMetadata
from scipy import stats
import time

#X_train = X_train[['release_speed', 'distance_factor']]
#X_test = X_test[['release_speed', 'distance_factor']]

def fit_regressors(X, y):
    regressors = {}
    #fit regressors
    for col in y.columns:
        param_grid = {
            'max_depth': [3, 5],
            'min_samples_split': [2],
            'min_samples_leaf': [1]
        }
        tree = DecisionTreeRegressor()
        grid_search = GridSearchCV(estimator=tree, param_grid=param_grid, cv=5, scoring='r2')
        grid_search.fit(X, y[col])
        best_tree = grid_search.best_estimator_
        regressors[col] = deepcopy(best_tree)
    return regressors


def fit_regressors2(X, y):
    regressors = {}
    # Fit regressors
    for col in y.columns:
        param_grid = {
            'alpha': [0.01, 0.1, 1.0, 10.0, 100.0]
        }
        ridge = Ridge()
        grid_search = GridSearchCV(estimator=ridge, param_grid=param_grid, cv=5, scoring='r2')
        grid_search.fit(X, y[col])
        best_ridge = grid_search.best_estimator_
        regressors[col] = deepcopy(best_ridge)
    return regressors

def regressor_diagnostics(regressors, X, y):
    for col in y.columns:
        y_pred = regressors[col].predict(X)
        r2 = r2_score(y[col], y_pred)
        mse = mean_squared_error(y[col], y_pred)
        print(f'{col}: r2 {r2:.2f}, mse {mse}')
    
def get_density(X):
    pdf, x = np.histogram(X, np.linspace(min(X.values)[0], max(X.values)[0], 1_001), density=True)
    cdf = np.cumsum(pdf) * (x[1] - x[0])
    return x, pdf, cdf
    
def generate_sample(quantile_func, n=100_000):
    return quantile_func(np.random.uniform(size=n))
    
def get_quantile_func(X):
    x, pdf, cdf = get_density(X)
    quantile = lambda val: x[bisect(cdf, val)] 
    return np.vectorize(quantile)

def compute_resids(regressors, X_train, y_train):
    resids = {}
    for col in y_train.columns:
        resids[col] = y_train[col] - regressors[col].predict(X_train)
    resids = pd.DataFrame(resids)
    return resids

def generate_joint_samples(y_train, n_samples=10_000):

    #compute resids & get marginal quantile functions
    #resids = compute_resids(regressors, X_train, y_train)

    #get quantile functions
    quantile_funcs = {}
    for col in y_train.columns:
        quantile_funcs[col] = get_quantile_func(y_train[[col]])

    #get correlation matrix
    corr_matrix = np.corrcoef(y_train, rowvar=False)
    # Step 1: Generate correlated normal samples
    normal_samples = np.random.multivariate_normal(np.zeros(3), corr_matrix, size=n_samples)
    # Step 2: Transform normal samples to uniform using the normal CDF
    uniform_samples = stats.norm.cdf(normal_samples)
    # Step 3: map back to desired space
    joint_samples = {}
    for idx, col in enumerate(y_train.columns):
        joint_samples[col] = quantile_funcs[col](uniform_samples[:, idx])
    return pd.DataFrame(joint_samples)

def run_fit_evaluation(real_data, generated_data):
    meta = SingleTableMetadata()
    meta.detect_from_dataframe(generated_data)
    
    # 1. perform basic validity checks
    diagnostic = run_diagnostic(real_data, generated_data, meta)
    
    # 2. measure the statistical similarity
    quality_report = evaluate_quality(real_data, generated_data, meta)
    
    # 3. plot the data
    for col in y_test.columns:
        fig = get_column_plot(
            real_data=real_data,
            synthetic_data=generated_data,
            metadata=meta,
            column_name=col
        )
        fig.show()
        time.sleep(0.5)

def predict_regressors(regressors, X):
    regressed_data = {}
    for col in regressors.keys():
        regressed_data[col] = regressors[col].predict(X)

    return pd.DataFrame(regressed_data)
        

#fit regressors, get residuals
#regressors = fit_regressors(X_train, y_train)

#regressor_diagnostics(regressors, X_train, y_train)
#regressor_diagnostics(regressors, X_test, y_test)

#sample_res = generate_joint_residual_samples(regressors, X_train, y_train, n_samples=len(y_test))

#print(sample_res)

#pred_data = predict_regressors(regressors, X_test) + sample_res

#y_pred = generate_joint_samples(y_train)

run_fit_evaluation(y_train, generated_df)
run_fit_evaluation(y_test, generated_df)


2024-05-20 11:37:26,308 - sdv.metadata.single_table - INFO - Detected metadata:
2024-05-20 11:37:26,309 - sdv.metadata.single_table - INFO - {
    "columns": {
        "launch_speed": {
            "sdtype": "numerical"
        },
        "launch_angle": {
            "sdtype": "numerical"
        },
        "spray_angle": {
            "sdtype": "numerical"
        }
    },
    "METADATA_SPEC_VERSION": "SINGLE_TABLE_V1"
}


Generating report ...

(1/2) Evaluating Data Validity: |█████████████████████████████████████████████████████| 3/3 [00:00<00:00, 962.95it/s]|
Data Validity Score: 100.0%

(2/2) Evaluating Data Structure: |████████████████████████████████████████████████████| 1/1 [00:00<00:00, 687.14it/s]|
Data Structure Score: 100.0%

Overall Score (Average): 100.0%

Generating report ...

(1/2) Evaluating Column Shapes: |█████████████████████████████████████████████████████| 3/3 [00:00<00:00, 782.08it/s]|
Column Shapes Score: 93.55%

(2/2) Evaluating Column Pair Trends: |████████████████████████████████████████████████| 3/3 [00:00<00:00, 182.02it/s]|
Column Pair Trends Score: 97.16%

Overall Score (Average): 95.36%



2024-05-20 11:37:28,057 - sdv.metadata.single_table - INFO - Detected metadata:
2024-05-20 11:37:28,059 - sdv.metadata.single_table - INFO - {
    "columns": {
        "launch_speed": {
            "sdtype": "numerical"
        },
        "launch_angle": {
            "sdtype": "numerical"
        },
        "spray_angle": {
            "sdtype": "numerical"
        }
    },
    "METADATA_SPEC_VERSION": "SINGLE_TABLE_V1"
}


Generating report ...

(1/2) Evaluating Data Validity: |█████████████████████████████████████████████████████| 3/3 [00:00<00:00, 314.62it/s]|
Data Validity Score: 94.33%

(2/2) Evaluating Data Structure: |████████████████████████████████████████████████████| 1/1 [00:00<00:00, 444.97it/s]|
Data Structure Score: 100.0%

Overall Score (Average): 97.16%

Generating report ...

(1/2) Evaluating Column Shapes: |█████████████████████████████████████████████████████| 3/3 [00:00<00:00, 517.84it/s]|
Column Shapes Score: 82.64%

(2/2) Evaluating Column Pair Trends: |████████████████████████████████████████████████| 3/3 [00:00<00:00, 173.82it/s]|
Column Pair Trends Score: 91.02%

Overall Score (Average): 86.83%



In [23]:
print(y_test.describe(), '\n\n',
      y_train.describe(), '\n\n',
      y_pred.describe())

       launch_speed  launch_angle  spray_angle
count     15.000000     15.000000    15.000000
mean      91.440000     21.066667    98.400000
std       10.905949     27.824621    22.516026
min       72.700000    -41.000000    48.000000
25%       86.750000     13.000000    86.000000
50%       93.700000     24.000000   104.000000
75%       99.750000     37.000000   117.000000
max      107.600000     67.000000   128.000000 

        launch_speed  launch_angle  spray_angle
count    135.000000    135.000000   135.000000
mean      89.818519     24.600000    99.437037
std       12.395866     24.043245    27.622349
min       51.600000    -45.000000    27.000000
25%       83.750000      9.000000    79.000000
50%       91.100000     24.000000   106.000000
75%       99.850000     39.000000   118.000000
max      109.300000     85.000000   232.000000 

        launch_speed  launch_angle   spray_angle
count  10000.000000  10000.000000  10000.000000
mean      89.632251     24.498754     98.935668
std 

In [18]:
run_fit_evaluation(y_train, y_pred)


2024-05-19 17:54:09,623 - sdv.metadata.single_table - INFO - Detected metadata:
2024-05-19 17:54:09,626 - sdv.metadata.single_table - INFO - {
    "columns": {
        "launch_speed": {
            "sdtype": "numerical"
        },
        "launch_angle": {
            "sdtype": "numerical"
        },
        "spray_angle": {
            "sdtype": "numerical"
        }
    },
    "METADATA_SPEC_VERSION": "SINGLE_TABLE_V1"
}


Generating report ...

(1/2) Evaluating Data Validity: |█████████████████████████████████████████████████████| 3/3 [00:00<00:00, 496.39it/s]|
Data Validity Score: 100.0%

(2/2) Evaluating Data Structure: |████████████████████████████████████████████████████| 1/1 [00:00<00:00, 469.42it/s]|
Data Structure Score: 100.0%

Overall Score (Average): 100.0%

Generating report ...

(1/2) Evaluating Column Shapes: |█████████████████████████████████████████████████████| 3/3 [00:00<00:00, 249.76it/s]|
Column Shapes Score: 97.65%

(2/2) Evaluating Column Pair Trends: |████████████████████████████████████████████████| 3/3 [00:00<00:00, 155.61it/s]|
Column Pair Trends Score: 99.44%

Overall Score (Average): 98.55%



In [10]:
print(np.corrcoef(y_train, rowvar=False))
np.corrcoef(y_pred, rowvar=False)

[[ 1.          0.32003678 -0.10351609]
 [ 0.32003678  1.          0.42257853]
 [-0.10351609  0.42257853  1.        ]]


array([[ 1.        ,  0.31553617, -0.08704386],
       [ 0.31553617,  1.        ,  0.4141389 ],
       [-0.08704386,  0.4141389 ,  1.        ]])

In [12]:
y_pred.describe()

Unnamed: 0,launch_speed,launch_angle,spray_angle
count,10000.0,10000.0,10000.0
mean,92.994955,8.362973,89.079298
std,14.65406,30.289969,22.303942
min,21.0,-81.0,48.0
25%,84.6384,-10.095,69.978
50%,96.6653,8.976,86.907
75%,104.1466,27.884,105.915
max,115.6053,81.837,146.901


In [14]:
y_train.describe()

Unnamed: 0,launch_speed,launch_angle,spray_angle
count,429.0,429.0,429.0
mean,92.833566,7.606061,88.948718
std,14.811679,30.041576,22.255256
min,21.0,-81.0,48.0
25%,84.6,-11.0,70.0
50%,96.6,8.0,87.0
75%,104.1,27.0,106.0
max,115.7,82.0,147.0
