In [550]:
import pandas as pd
from scipy import stats
import math
import numpy as np

from sklearn.impute import SimpleImputer
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.decomposition import PCA

In [551]:
df = pd.read_csv('sc_stats.csv')
df.head()

Unnamed: 0,mp,fg,fga,fg_pct,fg3,fg3a,fg3_pct,ft,fta,ft_pct,orb,drb,trb,ast,stl,blk,pf,tov,pts,plus_minus
0,35:39,7,12,0.583,0,1,0.0,0,0,,1,1,2,7,4,0,2,2,14,7
1,39:05,5,9,0.556,2,3,0.667,0,0,,0,2,2,4,1,0,5,3,12,-19
2,28:27,3,6,0.5,1,2,0.5,0,0,,0,5,5,9,2,0,4,1,7,-4
3,21:32,1,5,0.2,0,1,0.0,3,4,0.75,0,1,1,3,0,0,6,0,5,-13
4,31:15,4,8,0.5,1,2,0.5,0,2,0.0,1,3,4,6,0,0,4,5,9,-5


In [552]:
def convert_mp(mp):
    minutes, sec = mp.split(':')
    return int(minutes) + (int(sec) / 60)

In [553]:
df['tot_time'] = df['mp'].apply(lambda row: convert_mp(row))

In [554]:
cols = df.columns.tolist()
cols.remove('mp')

In [555]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 699 entries, 0 to 698
Data columns (total 21 columns):
mp            699 non-null object
fg            699 non-null int64
fga           699 non-null int64
fg_pct        697 non-null float64
fg3           699 non-null int64
fg3a          699 non-null int64
fg3_pct       695 non-null float64
ft            699 non-null int64
fta           699 non-null int64
ft_pct        603 non-null float64
orb           699 non-null int64
drb           699 non-null int64
trb           699 non-null int64
ast           699 non-null int64
stl           699 non-null int64
blk           699 non-null int64
pf            699 non-null int64
tov           699 non-null int64
pts           699 non-null int64
plus_minus    699 non-null int64
tot_time      699 non-null float64
dtypes: float64(4), int64(16), object(1)
memory usage: 114.8+ KB


In [556]:
df.isna().sum(axis=0)

mp             0
fg             0
fga            0
fg_pct         2
fg3            0
fg3a           0
fg3_pct        4
ft             0
fta            0
ft_pct        96
orb            0
drb            0
trb            0
ast            0
stl            0
blk            0
pf             0
tov            0
pts            0
plus_minus     0
tot_time       0
dtype: int64

In [557]:
imputer = SimpleImputer(strategy='constant', fill_value=0)

In [558]:
df = pd.DataFrame(data=imputer.fit_transform(df), columns=df.columns)

In [559]:
df.isna().sum(axis=0)

mp            0
fg            0
fga           0
fg_pct        0
fg3           0
fg3a          0
fg3_pct       0
ft            0
fta           0
ft_pct        0
orb           0
drb           0
trb           0
ast           0
stl           0
blk           0
pf            0
tov           0
pts           0
plus_minus    0
tot_time      0
dtype: int64

In [560]:
df_X = df.loc[:, 'fg': 'tot_time']
df_X.drop('plus_minus', axis=1, inplace=True)

df_y = df['plus_minus']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [561]:
df_X_orig = df_X.copy()
# df_X_orig.drop('plus_minus', axis=1, inplace=True)

df_y_orig = df_y.copy()

In [562]:
df_X.head()

Unnamed: 0,fg,fga,fg_pct,fg3,fg3a,fg3_pct,ft,fta,ft_pct,orb,drb,trb,ast,stl,blk,pf,tov,pts,tot_time
0,7,12,0.583,0,1,0.0,0,0,0.0,1,1,2,7,4,0,2,2,14,35.65
1,5,9,0.556,2,3,0.667,0,0,0.0,0,2,2,4,1,0,5,3,12,39.0833
2,3,6,0.5,1,2,0.5,0,0,0.0,0,5,5,9,2,0,4,1,7,28.45
3,1,5,0.2,0,1,0.0,3,4,0.75,0,1,1,3,0,0,6,0,5,21.5333
4,4,8,0.5,1,2,0.5,0,2,0.0,1,3,4,6,0,0,4,5,9,31.25


In [563]:
df_X_orig.head()

Unnamed: 0,fg,fga,fg_pct,fg3,fg3a,fg3_pct,ft,fta,ft_pct,orb,drb,trb,ast,stl,blk,pf,tov,pts,tot_time
0,7,12,0.583,0,1,0.0,0,0,0.0,1,1,2,7,4,0,2,2,14,35.65
1,5,9,0.556,2,3,0.667,0,0,0.0,0,2,2,4,1,0,5,3,12,39.0833
2,3,6,0.5,1,2,0.5,0,0,0.0,0,5,5,9,2,0,4,1,7,28.45
3,1,5,0.2,0,1,0.0,3,4,0.75,0,1,1,3,0,0,6,0,5,21.5333
4,4,8,0.5,1,2,0.5,0,2,0.0,1,3,4,6,0,0,4,5,9,31.25


# Original Analysis

In [564]:
ALPHA = 0.05

In [565]:
cols = df_X.columns.tolist()

high_info = {
    'feature1': [],
    'feature2': [],
    'r': []
}

mid_info = {
    'feature1': [],
    'feature2': [],
    'r': []
}

high_corr_cutoff = 0.9
mid_corr_cutoff = 0.65

for i in range(len(cols) - 1):
    j = i + 1
    while j < len(cols):
        correl_signif = stats.pearsonr(df_X[cols[i]], df_X[cols[j]])
        
        if abs(correl_signif[0]) > high_corr_cutoff:
            high_info['feature1'].append(cols[i])
            high_info['feature2'].append(cols[j])
            high_info['r'].append(correl_signif[0])
        
        elif abs(correl_signif[0]) < high_corr_cutoff and abs(correl_signif[0]) > mid_corr_cutoff:
            mid_info['feature1'].append(cols[i])
            mid_info['feature2'].append(cols[j])
            mid_info['r'].append(correl_signif[0])
        
        j += 1
        
high_info_df = pd.DataFrame(data=high_info)
print(f'High Correlations:\n\n{high_info_df}')

print('\n')

mid_info_df = pd.DataFrame(data=mid_info)
print('')
print(f'Decent Correlations:\n\n{mid_info_df}')

High Correlations:

  feature1 feature2         r
0       fg      pts  0.944255
1       ft      fta  0.979005
2      drb      trb  0.931680



Decent Correlations:

  feature1 feature2         r
0       fg      fga  0.799839
1       fg   fg_pct  0.674855
2       fg      fg3  0.738251
3      fga     fg3a  0.745016
4      fga      pts  0.766746
5      fg3     fg3a  0.788780
6      fg3      pts  0.808183
7     fg3a      pts  0.695964


In [566]:
label_corr_info = {
    'feature1': [],
    'feature2': [],
    'r': []
}

top_corrs = []
corr_cutoff = 0.29

for col in df_X.columns:
    correl_signif = stats.pearsonr(df_X[col], df_y)
    
    label_corr_info['feature1'].append(col)
    label_corr_info['feature2'].append('plus minus')
    label_corr_info['r'].append(correl_signif[0])
    
    if correl_signif[0] > corr_cutoff:
        top_corrs.append(col)
    
label_info_df = pd.DataFrame(label_corr_info)
print(label_info_df)

    feature1    feature2         r
0         fg  plus minus  0.222720
1        fga  plus minus  0.054447
2     fg_pct  plus minus  0.318048
3        fg3  plus minus  0.298575
4       fg3a  plus minus  0.229519
5    fg3_pct  plus minus  0.232160
6         ft  plus minus  0.158737
7        fta  plus minus  0.156827
8     ft_pct  plus minus  0.165189
9        orb  plus minus  0.005484
10       drb  plus minus  0.191611
11       trb  plus minus  0.178597
12       ast  plus minus  0.242444
13       stl  plus minus  0.162664
14       blk  plus minus  0.046458
15        pf  plus minus -0.124485
16       tov  plus minus -0.133273
17       pts  plus minus  0.272230
18  tot_time  plus minus -0.083537


In [567]:
top_corrs

['fg_pct', 'fg3']

# Feature Engineering

### New Columns

In [568]:
df_X['stl_tov'] = df_X['stl'].div(df_X['tov'].replace(0, np.inf))
df_X['stops/tov'] = (df_X['stl'] + df_X['blk']).div(df_X['tov'].replace(0, np.inf))
df_X['3/pts'] = df_X['fg3'].div(df_X['fg'].replace(0, np.inf))
df_X['3a'] = df_X['fg3a'].div(df_X['fga'].replace(0, np.inf))
df_X['fg3avg'] = df_X['fg3'] / df_X['fg3'].mean()
df_X['pts_avg'] = df_X['pts'] / df['pts'].mean()
df_X['fg_avg'] = df_X['fg'] / df['fg'].mean()
df_X['fg_pct_avg'] = df_X['fg_pct'] / df['fg_pct'].mean()
df_X['pts_per_min'] = (df_X['pts'] / df['tot_time']) * 60
df_X['net_pos_responsible_for'] = df_X['fg'] + df_X['ast'] + df_X['stl'] + df_X['blk'] - df['tov'] # possessions responsible for
df_X['ft+fg3'] = df_X['ft'] + df_X['fg3']
df_X['ast_ratio'] = df_X['ast'] / df_X['ast'].mean()

df_X['efficiency'] = df_X['pts'] + df_X['trb'] + df_X['ast'] + df_X['stl'] + df_X['blk'] - (df_X['fga'] - df_X['fg']) - (df_X['fta'] - df_X['ft']) - df_X['tov']
df_X['efg'] = (df_X['fg'] + (0.5 * df_X['fg3'])) / df_X['fga'].replace(0, np.inf)
df_X['tov%'] = 100 * df_X['tov'] / (df_X['fga'] + 0.44 * df_X['fta'] + df_X['tov']).replace(0, np.inf)
df_X['true_shooting%'] = df_X['pts'] / (2 * (df_X['fga'] + 0.44 * df_X['fta'])).replace(0, np.inf)
df_X['ppp'] = df_X['pts'] / (df_X['fga'] + 0.44 * df_X['fta'] + df_X['tov']).replace(0, np.inf)
df_X['gmsc'] = df_X['pts'] + 0.4 * df_X['fg'] - 0.7 * df_X['fga'] - 0.4 * (df_X['fta'] - df_X['ft']) + 0.7 * df_X['orb'] + 0.3 * df_X['drb'] + df_X['stl'] + 0.7 * df_X['ast'] + 0.7 * df_X['blk'] - 0.4 * df_X['pf'] - df_X['tov']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [569]:
new_cols = ['efficiency', 'ast_ratio', 'ft+fg3', 'gmsc', 'net_pos_responsible_for', 'pts_per_min', 'fg_avg', 'fg_pct_avg', 'pts_avg', 'ppp', 'true_shooting%', 'tov%', 'efg', 'fg3avg', '3a', 'stops/tov', '3/pts', 'stl_tov']

In [570]:
len(new_cols)

18

In [571]:
label_corr_info = {
    'feature1': [],
    'feature2': [],
    'r': []
}

corr_cutoff = 0.3

for col in new_cols:
    correl_signif = stats.pearsonr(df_X[col], df_y)
    
    label_corr_info['feature1'].append(col)
    label_corr_info['feature2'].append('Plus Minus')
    label_corr_info['r'].append(correl_signif[0])
    
    if correl_signif[0] > corr_cutoff:
        top_corrs.append(col)
    
label_info_df = pd.DataFrame(label_corr_info)
print(label_info_df)

                   feature1    feature2         r
0                efficiency  Plus Minus  0.437312
1                 ast_ratio  Plus Minus  0.242444
2                    ft+fg3  Plus Minus  0.285535
3                      gmsc  Plus Minus  0.423458
4   net_pos_responsible_for  Plus Minus  0.377271
5               pts_per_min  Plus Minus  0.365820
6                    fg_avg  Plus Minus  0.222720
7                fg_pct_avg  Plus Minus  0.318048
8                   pts_avg  Plus Minus  0.272230
9                       ppp  Plus Minus  0.412332
10           true_shooting%  Plus Minus  0.379701
11                     tov%  Plus Minus -0.185552
12                      efg  Plus Minus  0.367933
13                   fg3avg  Plus Minus  0.298575
14                       3a  Plus Minus  0.320089
15                stops/tov  Plus Minus  0.173569
16                    3/pts  Plus Minus  0.278758
17                  stl_tov  Plus Minus  0.166153


# Dataset Comparison

In [572]:
top_corrs

['fg_pct',
 'fg3',
 'efficiency',
 'gmsc',
 'net_pos_responsible_for',
 'pts_per_min',
 'fg_pct_avg',
 'ppp',
 'true_shooting%',
 'efg',
 '3a']

In [573]:
len(df_X_orig.columns)

19

In [574]:
# df_X_orig.head()

df_X_orig = StandardScaler().fit_transform(df_X_orig)
df_X_orig = pd.DataFrame(data=df_X_orig, columns=cols)
df_X_orig.head()

Unnamed: 0,fg,fga,fg_pct,fg3,fg3a,fg3_pct,ft,fta,ft_pct,orb,drb,trb,ast,stl,blk,pf,tov,pts,tot_time
0,-0.341324,-0.985658,0.872014,-1.52063,-1.920755,-2.148487,-1.282483,-1.329787,-2.187826,0.344512,-1.279758,-1.05433,0.135016,1.714657,-0.450976,-0.326794,-0.60069,-0.983268,0.207135
1,-0.946102,-1.569041,0.664808,-0.66859,-1.387974,1.2606,-1.282483,-1.329787,-2.187826,-0.807708,-0.82544,-1.05433,-0.905899,-0.545328,-0.450976,1.724961,-0.063069,-1.190506,0.747005
2,-1.550881,-2.152424,0.235048,-1.09461,-1.654364,0.407051,-1.282483,-1.329787,-2.187826,-0.807708,0.537511,0.20188,0.82896,0.208,-0.450976,1.041043,-1.138311,-1.708601,-0.92502
3,-2.15566,-2.346885,-2.067237,-1.52063,-1.920755,-2.148487,-0.231949,-0.014582,-0.064429,-0.807708,-1.279758,-1.473067,-1.252871,-1.298656,-0.450976,2.40888,-1.675932,-1.915839,-2.012623
4,-1.248492,-1.763502,0.235048,-1.09461,-1.654364,0.407051,-1.282483,-0.672184,-2.187826,0.344512,-0.371123,-0.216857,-0.211956,-1.298656,-0.450976,1.041043,1.012174,-1.501363,-0.484737


In [575]:
df_X_feature_engineered = df_X[top_corrs].copy()

df_X_feature_engineered = StandardScaler().fit_transform(df_X_feature_engineered)
df_X_feature_engineered = pd.DataFrame(data=df_X_feature_engineered, columns=top_corrs)
df_X_feature_engineered.head()

Unnamed: 0,fg_pct,fg3,efficiency,gmsc,net_pos_responsible_for,pts_per_min,fg_pct_avg,ppp,true_shooting%,efg,3a
0,0.872014,-1.52063,-0.386181,-0.345792,0.477085,-1.085387,0.872014,-0.168749,-0.177265,0.073803,-2.488748
1,0.664808,-0.66859,-1.134976,-1.260652,-1.289449,-1.408018,0.664808,-0.168749,0.335971,0.564942,-0.884595
2,0.235048,-1.09461,-0.47978,-0.814113,-0.11176,-1.637705,0.235048,-0.168749,-0.177265,0.073803,-0.884595
3,-2.067237,-1.52063,-1.88377,-1.87056,-1.878293,-1.689854,-2.067237,-1.041283,-1.492245,-2.185433,-1.740143
4,0.235048,-1.09461,-1.509373,-1.663627,-1.682012,-1.479707,0.235048,-1.347024,-0.648888,-0.048981,-1.419313


In [576]:
df_X_orig_plus_feature_engineered = StandardScaler().fit_transform(df_X)
df_X_orig_plus_feature_engineered = pd.DataFrame(data=df_X_orig_plus_feature_engineered, columns=df_X.columns)
df_X_orig_plus_feature_engineered.head()

Unnamed: 0,fg,fga,fg_pct,fg3,fg3a,fg3_pct,ft,fta,ft_pct,orb,...,pts_per_min,net_pos_responsible_for,ft+fg3,ast_ratio,efficiency,efg,tov%,true_shooting%,ppp,gmsc
0,-0.341324,-0.985658,0.872014,-1.52063,-1.920755,-2.148487,-1.282483,-1.329787,-2.187826,0.344512,...,-1.085387,0.477085,-1.789126,0.135016,-0.386181,0.073803,-0.004366,-0.177265,-0.168749,-0.345792
1,-0.946102,-1.569041,0.664808,-0.66859,-1.387974,1.2606,-1.282483,-1.329787,-2.187826,-0.807708,...,-1.408018,-1.289449,-1.294329,-0.905899,-1.134976,0.564942,1.309688,0.335971,-0.168749,-1.260652
2,-1.550881,-2.152424,0.235048,-1.09461,-1.654364,0.407051,-1.282483,-1.329787,-2.187826,-0.807708,...,-1.637705,-0.11176,-1.541727,0.82896,-0.47978,0.073803,-0.004366,-0.177265,-0.168749,-0.814113
3,-2.15566,-2.346885,-2.067237,-1.52063,-1.920755,-2.148487,-0.231949,-0.014582,-0.064429,-0.807708,...,-1.689854,-1.878293,-1.046931,-1.252871,-1.88377,-2.185433,-1.756438,-1.492245,-1.041283,-1.87056
4,-1.248492,-1.763502,0.235048,-1.09461,-1.654364,0.407051,-1.282483,-0.672184,-2.187826,0.344512,...,-1.479707,-1.682012,-1.541727,-0.211956,-1.509373,-0.048981,2.661611,-0.648888,-1.347024,-1.663627


In [577]:
len(df_X_orig.columns)

19

In [578]:
variance_cutoff = 0.9

pca = PCA(variance_cutoff)
principalComponents = pca.fit_transform(df_X_orig)
print(principalComponents)

[[ 3.46143927 -0.68860078  0.83687003 ... -1.49347648 -0.2772154
   1.81792583]
 [ 2.94721902 -2.20551613  0.72272748 ... -0.13727601 -0.55800333
   0.85363374]
 [ 3.9294657  -0.63654416  1.42395613 ... -0.49636947 -0.94757528
  -0.43883619]
 ...
 [-0.33094619 -0.35309016 -0.70047541 ... -1.12262338 -1.59499099
  -1.68899249]
 [ 3.19703944  1.45174529  0.40431825 ...  0.40664853  2.31884771
  -1.08761884]
 [-0.0911762   1.66293287 -1.53695052 ...  0.71109459 -0.42887575
  -0.62285592]]


In [579]:
pca.n_components_

11

In [580]:
scores = cross_val_score(SVR(), principalComponents, df_y_orig, cv=5, scoring='neg_mean_squared_error')
print(scores, scores.mean(), scores.std())

[-245.09708282 -160.82593095 -149.54130639 -158.60722274 -168.81387459] -176.5770834970736 34.805762932134556




In [581]:
len(df_X.columns)

37

In [582]:
variance_cutoff = 0.9

pca = PCA(variance_cutoff)
principalComponents = pca.fit_transform(df_X_orig_plus_feature_engineered)
print(principalComponents)

[[ 3.70383143 -0.93322647 -4.98181784 ... -1.27181534 -1.53822549
  -0.04570036]
 [ 3.6696671   3.62231109 -1.69169863 ... -0.53647806  1.62211145
  -0.93814117]
 [ 4.33241806  0.09551334 -4.14608858 ...  1.77575155  0.54885104
  -1.02732547]
 ...
 [-2.29449965 -2.53253616 -3.80644059 ... -0.66644057  0.5350653
  -0.21040415]
 [ 5.62641679 -0.2313842   1.44464281 ...  1.2261228   1.05056978
   2.93103522]
 [-0.15200335 -0.45589988  2.32423412 ...  2.32730604 -0.48736477
  -0.71470053]]


In [583]:
pca.n_components_

10

In [584]:
scores = cross_val_score(SVR(), principalComponents, df_y_orig, cv=5, scoring='neg_mean_squared_error')
print(scores, scores.mean(), scores.std())

[-252.73554215 -170.02972233 -155.5136045  -171.66868388 -183.0279053 ] -186.5950916315959 34.20687639799968




In [585]:
len(top_corrs)

11

In [586]:
variance_cutoff = 0.9

pca = PCA(variance_cutoff)
principalComponents = pca.fit_transform(df_X_feature_engineered)
print(principalComponents)

[[ 0.76923567  2.84318428  1.64982194]
 [ 1.16977223  2.39958672 -1.15655911]
 [ 1.32939648  1.62980001  0.25315131]
 ...
 [-2.22447447 -0.65833564  0.773597  ]
 [ 4.05326844 -1.18731444 -0.88435348]
 [ 0.26982985 -1.66155824 -0.83284524]]


In [587]:
pca.n_components_

3

In [588]:
scores = cross_val_score(SVR(), principalComponents, df_y_orig, cv=5, scoring='neg_mean_squared_error')
print(scores, scores.mean(), scores.std())

[-217.68432529 -156.85131904 -166.80325399 -165.04145232 -180.05733389] -177.28753690813102 21.52718311074687




### Model Comparison

In [589]:
svr1 = SVR()
svr2 = SVR()
svr3 = SVR()

In [590]:
param_grid = [
    {
        'C': [0.1, 0.5, 1.0, 2.5, 5.0],
        'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
        'degree': [3, 6, 9],
        'epsilon': [0.1, 0.5]
    }
]

In [591]:
orig_svr_gscv = GridSearchCV(svr1, param_grid, cv=3, scoring='neg_mean_squared_error', refit='neg_mean_squared_error')
fe_svr_gscv = GridSearchCV(svr2, param_grid, cv=3, scoring='neg_mean_squared_error', refit='neg_mean_squared_error')
df_x_svr_gscv = GridSearchCV(svr3, param_grid, cv=3, scoring='neg_mean_squared_error', refit='neg_mean_squared_error')

In [592]:
orig_svr_gscv.fit(df_X_orig, df_y_orig)



















GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='auto_deprecated', kernel='rbf',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [0.1, 0.5, 1.0, 2.5, 5.0], 'degree': [3, 6, 9],
                          'epsilon': [0.1, 0.5],
                          'kernel': ['linear', 'rbf', 'poly', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit='neg_mean_squared_error',
             return_train_score=False, scoring='neg_mean_squared_error',
             verbose=0)

In [593]:
orig_svr_gscv.best_estimator_, orig_svr_gscv.best_score_ 

(SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
     gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
     tol=0.001, verbose=False), -153.7650291500507)

In [594]:
fe_svr_gscv.fit(df_X_feature_engineered, df_y_orig)



















GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='auto_deprecated', kernel='rbf',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [0.1, 0.5, 1.0, 2.5, 5.0], 'degree': [3, 6, 9],
                          'epsilon': [0.1, 0.5],
                          'kernel': ['linear', 'rbf', 'poly', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit='neg_mean_squared_error',
             return_train_score=False, scoring='neg_mean_squared_error',
             verbose=0)

In [595]:
fe_svr_gscv.best_estimator_, fe_svr_gscv.best_score_

(SVR(C=5.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.5,
     gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
     tol=0.001, verbose=False), -153.88457590091193)

In [596]:
df_x_svr_gscv.fit(df_X_orig_plus_feature_engineered, df_y_orig)

















GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='auto_deprecated', kernel='rbf',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [0.1, 0.5, 1.0, 2.5, 5.0], 'degree': [3, 6, 9],
                          'epsilon': [0.1, 0.5],
                          'kernel': ['linear', 'rbf', 'poly', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit='neg_mean_squared_error',
             return_train_score=False, scoring='neg_mean_squared_error',
             verbose=0)

In [597]:
df_x_svr_gscv.best_estimator_, df_x_svr_gscv.best_score_

(SVR(C=0.5, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
     gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
     tol=0.001, verbose=False), -148.45160163358767)