# Train the California housing dataset using a SVM

## Download the dataset and split into train/test

In [1]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing["data"]
y = housing["target"]

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /home/sean/scikit_learn_data


In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
X_train[0]

array([ 3.25960000e+00,  3.30000000e+01,  5.01765650e+00,  1.00642055e+00,
        2.30000000e+03,  3.69181380e+00,  3.27100000e+01, -1.17030000e+02])

## Train an SVR regressor

In [17]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR

svm_svr = Pipeline([
    ('Feature_scaling', StandardScaler()),
    ('SVR', SVR(C=4.75, 
              kernel='rbf', 
              degree=3, 
              gamma=0.08, 
              verbose=True))
])

svm_svr.fit(X_train, y_train)

[LibSVM]

Pipeline(memory=None,
         steps=[('Feature_scaling',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('SVR',
                 SVR(C=4.75, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
                     gamma=0.08, kernel='rbf', max_iter=-1, shrinking=True,
                     tol=0.001, verbose=True))],
         verbose=False)

## Find out how good this model is on the data

In [18]:
svm_svr.score(X_train, y_train) # Higher the score the better the fit (perfect = 1)

0.7547342909705327

In [19]:
svm_svr.score(X_test, y_test)

0.7318654735218838

In [23]:
from sklearn.metrics import mean_squared_error
import numpy as np

y_pred_train = svm_svr.predict(X_train)
mse_train = mean_squared_error(y_train, y_pred_train)
np.sqrt(mse_train)

0.5725957177011344

In [25]:
y_pred_test = svm_svr.predict(X_test)
mse_test = mean_squared_error(y_test, y_pred_test)
np.sqrt(mse_test)

0.5927612427219713