In [19]:
# importing packages
import numpy as np
import pickle
from sklearn import datasets
import sklearn.metrics as sm
from sklearn.svm import SVR
from sklearn.utils import shuffle

In [4]:
# loading the housing data
data  = datasets.load_boston()

# reading the data and shuffling it to avoid bias
X, Y = shuffle(data.data, data.target, random_state=7)

In [5]:
X

array([[3.8497e+00, 0.0000e+00, 1.8100e+01, ..., 2.0200e+01, 3.9134e+02,
        1.3270e+01],
       [3.0410e-02, 0.0000e+00, 5.1900e+00, ..., 2.0200e+01, 3.9481e+02,
        1.0560e+01],
       [2.4103e-01, 0.0000e+00, 7.3800e+00, ..., 1.9600e+01, 3.9690e+02,
        1.2790e+01],
       ...,
       [8.4054e-01, 0.0000e+00, 8.1400e+00, ..., 2.1000e+01, 3.0342e+02,
        1.6510e+01],
       [4.0110e-02, 8.0000e+01, 1.5200e+00, ..., 1.2600e+01, 3.9690e+02,
        4.0800e+00],
       [6.6640e-02, 0.0000e+00, 4.0500e+00, ..., 1.6600e+01, 3.9096e+02,
        5.3300e+00]])

In [6]:
Y

array([21.7, 18.5, 22.2, 20.4,  8.8,  8.3, 21.4, 23.6, 23.5, 19. , 10.2,
       31.6, 18.4, 19.4, 44. , 23.2, 22. , 33.1, 13.2, 21.7, 16.7, 23.9,
       42.3, 18.5, 14.6,  7.5, 33.4, 22. , 28.1, 24.8, 19.9, 24.8, 28.7,
       14.1, 43.5, 20.4, 18.3, 13.4, 21. , 22. , 22.6, 13.9, 13.3, 26.5,
       20. ,  9.5, 21.6, 22.6, 15.6, 23.1, 27.1, 25.2, 15. , 21.9,  9.6,
       22.7, 18.3, 21.4, 20.5, 21.1, 24.5, 31. , 18.7, 18.7, 22.7, 50. ,
       16.7, 23. , 20.4, 22.8, 24.8, 10.5, 18.2, 31.5, 23.8, 19.6, 12.7,
       15.2, 31.2, 14.5, 17.8, 25. , 23. , 50. , 23.7, 25.1, 24.7, 16.8,
       34.7, 23.3, 50. , 46.7, 43.1, 24.4, 24.1,  6.3, 26.4, 16.3, 17.2,
       28. , 15.2, 16.6, 20.6, 16.4, 22. , 17.2, 13.5, 23.3, 26.2, 19.7,
       13.1, 23.9, 18.2, 19.3,  7. , 18.5, 13.8, 33.4, 12.6, 19.6, 20.8,
       25. , 21.7, 43.8, 13.4, 34.9, 24.5, 26.6, 36.5, 31.1, 22.6, 20.3,
       18.4, 28.7, 23.1, 17.6, 35.4, 31.6,  8.5, 22.2, 10.4, 22.8, 23.9,
       22.2, 32.9, 15.6, 29.8, 14.3, 28.4, 21.4, 23

In [7]:
# splitting the data into training and testing sets 80 : 20 split
training_no = int(0.8 * len(X))

# picking the training data 
X_train, Y_train = X[:training_no], Y[:training_no]

# picking the testing data
X_test, Y_test = X[training_no:], Y[training_no:]

In [9]:
# creating a support vector regressor using a linear kernel
# The C parameter represents the penalty of training error
# If you increase the value of C, the model will fine-tune it more to fit the training data
# But this might lead to overfitting and cause it to lose its generality
# The epsilon parameter specifies a threshold
# there is no penalty for training error if the predicted value is within this distance from the actual value:

In [10]:
# creating support regression model object
sv_regressor = SVR(kernel='linear', C=1.0, epsilon=0.1)

# Training the support Vector regressor
sv_regressor.fit(X_train, Y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [15]:
# predicting the output to evaluate the model performance
Y_test_predict = sv_regressor.predict(X_test)

# evaluating the perfomance of model using metrics
mse = sm.mean_squared_error(Y_test, Y_test_predict)
evs = sm.explained_variance_score(Y_test, Y_test_predict)

# printing the model metrics
print('support vector regressor performance')
print('Mean squared error =', round(mse, 2))
print('Explained variance score =', round(evs, 2))
print("R2 score =", round(sm.r2_score(Y_test, Y_test_predict), 2))

support vector regressor performance
Mean squared error = 15.38
Explained variance score = 0.82
R2 score = 0.81


In [18]:
# testing the regressor on test datapoint
datapoint = [3.7, 0, 18.4, 1, 0.87, 5.95, 91, 2.5052, 26, 666, 20.2, 351.34, 15.27]

# predicting the test data
output1 = sv_regressor.predict([datapoint])[0]

# printing the predicted house prices based on the attributes given 
print('Predicted price:', output1)

Predicted price: 18.521780107258536


In [21]:
# saving the model to be used for prediction later on
regressor_model_file = 'boston_housing_prices_prediction_svr.pkl'

# save the model
with open (regressor_model_file, 'wb') as f:
    pickle.dump(sv_regressor, f)