In [4]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
import numpy as np
import pandas as pd

In [5]:
from sklearn.discriminant_analysis import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR


concrete = pd.read_csv("Concrete_Data.csv")
X = concrete.drop('Strength',axis=1)
y = concrete[['Strength']]
kfold =KFold(n_splits=5, shuffle=True, random_state=23)

scaler_y = MinMaxScaler()
y_scl = scaler_y.fit_transform(y)
svr = SVR()

In [3]:

kernel = ['linear']
params = {'C':np.linspace(0.001,5,5),'kernel':kernel,'epsilon':np.linspace(0.001,5,5)}
gcv = GridSearchCV(svr, param_grid=params, cv=kfold, verbose=3)
gcv.fit(X,y_scl[:,0])
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV 1/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.669 total time=   1.1s
[CV 2/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.501 total time=   1.1s
[CV 3/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.665 total time=   1.0s
[CV 4/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.501 total time=   0.9s
[CV 5/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.436 total time=   0.9s
[CV 1/5] END C=0.001, epsilon=0.5564444444444444, kernel=linear;, score=-0.099 total time=   0.0s
[CV 2/5] END C=0.001, epsilon=0.5564444444444444, kernel=linear;, score=-0.207 total time=   0.0s
[CV 3/5] END C=0.001, epsilon=0.5564444444444444, kernel=linear;, score=-0.135 total time=   0.0s
[CV 4/5] END C=0.001, epsilon=0.5564444444444444, kernel=linear;, score=-0.103 total time=   0.0s
[CV 5/5] END C=0.001, epsilon=0.5564444444444444, kernel=linear;, score=-0.310 total time=   0.0s
[CV 1/5] END C=0.001, epsil

In [8]:
kernel = ['rbf']
epsilon = np.linspace(0.001,5,5)
params = {'C':np.linspace(0.001,5,5),'kernel':kernel,'epsilon':epsilon}
gcv = GridSearchCV(svr, param_grid=params, cv=kfold, verbose=3)
gcv.fit(X,y_scl[:,0])
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV 1/5] END C=0.001, epsilon=0.001, kernel=rbf;, score=0.012 total time=   0.1s
[CV 2/5] END C=0.001, epsilon=0.001, kernel=rbf;, score=0.035 total time=   0.1s
[CV 3/5] END C=0.001, epsilon=0.001, kernel=rbf;, score=0.032 total time=   0.1s
[CV 4/5] END C=0.001, epsilon=0.001, kernel=rbf;, score=0.019 total time=   0.1s
[CV 5/5] END C=0.001, epsilon=0.001, kernel=rbf;, score=0.040 total time=   0.1s
[CV 1/5] END C=0.001, epsilon=0.5564444444444444, kernel=rbf;, score=-0.099 total time=   0.0s
[CV 2/5] END C=0.001, epsilon=0.5564444444444444, kernel=rbf;, score=-0.207 total time=   0.0s
[CV 3/5] END C=0.001, epsilon=0.5564444444444444, kernel=rbf;, score=-0.135 total time=   0.0s
[CV 4/5] END C=0.001, epsilon=0.5564444444444444, kernel=rbf;, score=-0.103 total time=   0.0s
[CV 5/5] END C=0.001, epsilon=0.5564444444444444, kernel=rbf;, score=-0.310 total time=   0.0s
[CV 1/5] END C=0.001, epsilon=1.1118888888888887, kernel=

In [9]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

pipe = Pipeline([('SCL',StandardScaler()),('SVR',svr)])
kernel = ['linear']
epsilon = np.linspace(0.001,5,5)
params = {'SVR__C':np.linspace(0.001,5,5),'SVR__kernel':kernel,'SVR__epsilon':epsilon,'SCL':[StandardScaler(),MinMaxScaler()]}
gcv = GridSearchCV(pipe, param_grid=params, cv=kfold, verbose=3)
gcv.fit(X,y_scl[:,0])
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits
[CV 1/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=linear;, score=0.632 total time=   0.1s
[CV 2/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=linear;, score=0.536 total time=   0.1s
[CV 3/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=linear;, score=0.613 total time=   0.1s
[CV 4/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=linear;, score=0.555 total time=   0.1s
[CV 5/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=linear;, score=0.522 total time=   0.1s
[CV 1/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.5564444444444444, SVR__kernel=linear;, score=-0.099 total time=   0.0s
[CV 2/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.5564444444444444, SVR__kernel=linear;, score=-0.207 total time=   0.0s
[CV 3/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon

In [10]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

pipe = Pipeline([('SCL',StandardScaler()),('SVR',svr)])
kernel = ['rbf']
epsilon = np.linspace(0.001,5,5)
params = {'SVR__C':np.linspace(0.001,5,5),'SVR__kernel':kernel,'SVR__epsilon':epsilon,'SCL':[StandardScaler(),MinMaxScaler()]}
gcv = GridSearchCV(pipe, param_grid=params, cv=kfold, verbose=3)
gcv.fit(X,y_scl[:,0])
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits
[CV 1/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=rbf;, score=0.156 total time=   0.1s
[CV 2/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=rbf;, score=0.174 total time=   0.1s
[CV 3/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=rbf;, score=0.169 total time=   0.1s
[CV 4/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=rbf;, score=0.160 total time=   0.1s
[CV 5/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.001, SVR__kernel=rbf;, score=0.166 total time=   0.1s
[CV 1/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.5564444444444444, SVR__kernel=rbf;, score=-0.099 total time=   0.0s
[CV 2/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.5564444444444444, SVR__kernel=rbf;, score=-0.207 total time=   0.0s
[CV 3/5] END SCL=StandardScaler(), SVR__C=0.001, SVR__epsilon=0.5564444444444444, 

Inferencing

In [17]:
bm_mm = gcv.best_estimator_
tst_conc = pd.read_csv('testConcrete.csv')
y_predit_scl = bm_mm.predict(tst_conc)
y_predit = scaler_y.inverse_transform(y_predit_scl.reshape(-1,1))
#or
min_y = y.min().values[0]
max_y = y.max().values[0]
y_pred = y_predit_scl*(max_y-min_y) + min_y


Randomized Grid Search CV

In [7]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.discriminant_analysis import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR



pipe = Pipeline([('SCL',StandardScaler()),('SVR',svr)])
kernel = ['linear']
epsilon = np.linspace(0.001,5,5)
params = {'SVR__C':np.linspace(0.001,5,5),'SVR__kernel':kernel,'SVR__epsilon':epsilon,'SCL':[StandardScaler(),MinMaxScaler()]}
gcv = RandomizedSearchCV(pipe, param_distributions=params, cv=kfold, verbose=3, n_iter=30, random_state=23)
gcv.fit(X,y_scl[:,0])
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 1/5] END SCL=StandardScaler(), SVR__C=2.5004999999999997, SVR__epsilon=5.0, SVR__kernel=linear;, score=-0.099 total time=   0.0s
[CV 2/5] END SCL=StandardScaler(), SVR__C=2.5004999999999997, SVR__epsilon=5.0, SVR__kernel=linear;, score=-0.207 total time=   0.0s
[CV 3/5] END SCL=StandardScaler(), SVR__C=2.5004999999999997, SVR__epsilon=5.0, SVR__kernel=linear;, score=-0.135 total time=   0.0s
[CV 4/5] END SCL=StandardScaler(), SVR__C=2.5004999999999997, SVR__epsilon=5.0, SVR__kernel=linear;, score=-0.103 total time=   0.0s
[CV 5/5] END SCL=StandardScaler(), SVR__C=2.5004999999999997, SVR__epsilon=5.0, SVR__kernel=linear;, score=-0.310 total time=   0.0s
[CV 1/5] END SCL=StandardScaler(), SVR__C=3.75025, SVR__epsilon=3.75025, SVR__kernel=linear;, score=-0.099 total time=   0.0s
[CV 2/5] END SCL=StandardScaler(), SVR__C=3.75025, SVR__epsilon=3.75025, SVR__kernel=linear;, score=-0.207 total time=   0.0s
[CV 3/5] END SCL=Stan

In [9]:
kernel = ['linear']
params = {'C':np.linspace(0.001,5,5),'kernel':kernel,'epsilon':np.linspace(0.001,5,5)}
gcv = RandomizedSearchCV(svr, param_distributions=params, cv=kfold, verbose=3, n_iter=30, random_state=23)
gcv.fit(X,y_scl[:,0])
print(gcv.best_params_)
print(gcv.best_score_)



Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.669 total time=   1.1s
[CV 2/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.501 total time=   1.1s
[CV 3/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.665 total time=   1.0s
[CV 4/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.501 total time=   1.0s
[CV 5/5] END C=0.001, epsilon=0.001, kernel=linear;, score=0.436 total time=   0.9s
[CV 1/5] END C=0.001, epsilon=1.2507499999999998, kernel=linear;, score=-0.099 total time=   0.0s
[CV 2/5] END C=0.001, epsilon=1.2507499999999998, kernel=linear;, score=-0.207 total time=   0.0s
[CV 3/5] END C=0.001, epsilon=1.2507499999999998, kernel=linear;, score=-0.135 total time=   0.0s
[CV 4/5] END C=0.001, epsilon=1.2507499999999998, kernel=linear;, score=-0.103 total time=   0.0s
[CV 5/5] END C=0.001, epsilon=1.2507499999999998, kernel=linear;, score=-0.310 total time=   0.0s
[CV 1/5] END C=0.001, epsilo