In [1]:
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [2]:
boston = datasets.load_boston()

In [3]:
X = boston.data
y = boston.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
clf = svm.SVR(kernel = 'rbf')  #rbf kernel

In [6]:
clf.fit(X_train, y_train)



SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [7]:
clf.score(X_test, y_test)  #Very bad accuracy

0.0212554013860895

In [8]:
#kernel = linear
clf = svm.SVR(kernel = 'linear')
clf.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [9]:
clf.score(X_test, y_test)

0.670344007078583

# Using GridSearchCV to find optimal parameters

In [10]:
clf = svm.SVR()
grid = {'C' : [1e2, 1e3, 5e3, 1e4, 5e4, 1e5], 
       'gamma' : [1e-3, 5e-4, 1e-4, 5e-3]}

In [11]:
abc = GridSearchCV(clf, grid)

In [12]:
abc.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [100.0, 1000.0, 5000.0, 10000.0, 50000.0, 100000.0], 'gamma': [0.001, 0.0005, 0.0001, 0.005]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [13]:
abc.best_estimator_

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.0001,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [14]:
abc.cv_results_



{'mean_fit_time': array([0.02232059, 0.03864241, 0.01365821, 0.01832231, 0.06229544,
        0.1262668 , 0.06429696, 0.0263106 , 0.13059664, 0.41942088,
        0.45373297, 0.03464556, 0.17822901, 0.62097605, 0.60331313,
        0.03864328, 0.23887054, 0.83785454, 1.97652594, 0.02898224,
        0.28017465, 1.0963707 , 2.74443523, 0.03197503]),
 'std_fit_time': array([0.00188644, 0.01013609, 0.00169524, 0.00123696, 0.00939427,
        0.01033665, 0.00249279, 0.00286673, 0.00704423, 0.02048457,
        0.06909644, 0.00188599, 0.01460698, 0.14181462, 0.0995481 ,
        0.00478149, 0.05836856, 0.05150673, 0.14095291, 0.00431808,
        0.11568035, 0.149335  , 0.18820131, 0.0029396 ]),
 'mean_score_time': array([0.00233269, 0.00266679, 0.00199914, 0.00266568, 0.00199906,
        0.00200009, 0.00166623, 0.00233173, 0.00199938, 0.00167155,
        0.0023396 , 0.00333246, 0.002666  , 0.00233444, 0.00233261,
        0.0033315 , 0.00199938, 0.00199874, 0.0019989 , 0.00299907,
        0.002662

In [15]:
#Fitting the model using optimal parameters
clf = svm.SVR(C = 1000, gamma = 0.0001)
clf.fit(X_train, y_train)

SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.0001,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [16]:
clf.score(X_test, y_test)  #This is the best accuracy for the optimal C and gamma

0.7902510501268216