# SVM with Regression - SVR

- Epsilon parameter: training penalty (distance from real value)

Kernel types:

- Linear Kernel

- Polynomial Kernel

- RBF Kernel

It is important to STANDARDIZE DATA!

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [3]:
from sklearn.svm import SVR

# Health care data

In [2]:
data_health = pd.read_csv("health_care2.csv")
x_health = data_health.iloc[:,0:1].values
y_health = data_health.iloc[:,1].values
x_health,y_health

(array([[18],
        [23],
        [28],
        [33],
        [38],
        [43],
        [48],
        [53],
        [58],
        [63]], dtype=int64),
 array([  470,   520,   630,   830,  1150,  1530,  2040,  3080,  5100,
        10100], dtype=int64))

In [17]:
from sklearn.preprocessing import StandardScaler

In [18]:
scaler_x = StandardScaler()
x_health_stand = scaler_x.fit_transform(x_health)
scaler_y = StandardScaler()
y_health_stand = scaler_y.fit_transform(y_health.reshape(-1,1))

## Linear Kernel

In [6]:
reg_svr_health_linear = SVR(kernel='linear')
reg_svr_health_linear.fit(x_health,y_health)

In [7]:
plot = px.scatter(x = x_health.ravel(),
                  y = y_health)
plot.add_scatter(x = x_health.ravel(),
                  y = reg_svr_health_linear.predict(x_health.reshape(-1,1)),
                  name = "Regression")
plot.show()

## Polynomial Kernel

In [14]:
reg_svr_health_poly = SVR(kernel='poly', degree=4)
reg_svr_health_poly.fit(x_health,y_health)

In [15]:
plot = px.scatter(x = x_health.ravel(),
                  y = y_health)
plot.add_scatter(x = x_health.ravel(),
                  y = reg_svr_health_poly.predict(x_health.reshape(-1,1)),
                  name = "Regression")
plot.show()

## RBF Kernel

In [12]:
reg_svr_health_rbf = SVR(kernel='rbf')
reg_svr_health_rbf.fit(x_health,y_health)

In [16]:
plot = px.scatter(x = x_health.ravel(),
                  y = y_health)
plot.add_scatter(x = x_health.ravel(),
                  y = reg_svr_health_rbf.predict(x_health.reshape(-1,1)),
                  name = "Regression")
plot.show()

## After standardize data

In [28]:
reg_svr_health_rbf = SVR(kernel='rbf')
reg_svr_health_rbf.fit(x_health_stand,y_health_stand.ravel())

In [29]:
plot = px.scatter(x = x_health_stand.ravel(),
                  y = y_health_stand.ravel())
plot.add_scatter(x = x_health_stand.ravel(),
                  y = reg_svr_health_rbf.predict(x_health_stand),
                  name = "Regression")
plot.show()

In [33]:
new = scaler_x.transform([[40]])
new

array([[-0.03481553]])

In [48]:
reg_svr_health_rbf.predict(new)

array([-0.42259356])

In [49]:
scaler_y.inverse_transform([reg_svr_health_rbf.predict(new)])

array([[1333.85650748]])

# House prices data

In [50]:
data_house = pd.read_csv("house_prices.csv")
x_house = data_house.iloc[:,3:19].values
y_house = data_house.iloc[:,2].values #price

In [51]:
from sklearn.model_selection import train_test_split
x_house_train, x_house_test, y_house_train, y_house_test = train_test_split(x_house,y_house,
                                                                            test_size=0.3,
                                                                            random_state=0)

In [52]:
x_house_train.shape, x_house_test.shape, y_house_train.shape, y_house_test.shape

((15129, 16), (6484, 16), (15129,), (6484,))

In [62]:
scaler_x = StandardScaler()
x_house_train_stand = scaler_x.fit_transform(x_house_train)
x_house_test_stand = scaler_x.transform(x_house_test)
scaler_y = StandardScaler()
y_house_train_stand = scaler_y.fit_transform(y_house_train.reshape(-1,1))
y_house_test_stand = scaler_y.transform(y_house_test.reshape(-1,1))

In [63]:
x_house_train_stand, y_house_train_stand

(array([[ 0.67170039, -0.79711466, -0.74522967, ...,  1.03727271,
          1.17348387, -0.83797629],
        [-0.38696772, -0.79711466, -0.68028398, ...,  1.03727271,
          1.53476983, -0.95879989],
        [ 1.7303685 ,  0.82292131,  0.84593978, ..., -0.48008241,
          1.07108346,  0.78248142],
        ...,
        [-0.38696772,  0.17490692,  0.30472568, ..., -0.66741021,
         -1.25528083,  0.3986888 ],
        [ 0.67170039, -0.14910028,  0.31554996, ..., -1.43545415,
         -1.99443873, -0.46129094],
        [ 0.67170039,  0.17490692,  0.32637425, ..., -1.3043247 ,
          0.38168351,  0.66876509]]),
 array([[-0.38454326],
        [-0.3024967 ],
        [ 0.49062008],
        ...,
        [-0.29976181],
        [-0.35445952],
        [ 0.43564888]]))

## RBF Kernel

In [65]:
reg_svr_house = SVR(kernel='rbf')
reg_svr_house.fit(x_house_train_stand,y_house_train_stand)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



In [66]:
reg_svr_house.score(x_house_train_stand,y_house_train_stand)

0.8123337393146999

In [67]:
reg_svr_house.score(x_house_test_stand,y_house_test_stand)

0.7374454724624955

In [68]:
prediction = reg_svr_house.predict(x_house_test_stand)
prediction

array([-0.54571552,  3.46454238,  0.02286424, ..., -0.46405796,
       -0.91113329, -1.03334346])

In [70]:
y_house_test_stand

array([[-0.66623646],
       [ 2.83715179],
       [ 0.05878167],
       ...,
       [-0.43924097],
       [-0.74554814],
       [-0.91511104]])

In [76]:
y_house_test_stand_inverse = scaler_y.inverse_transform(y_house_test_stand)
prediction_inverse = scaler_y.inverse_transform(prediction.reshape(-1,1))

In [73]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,root_mean_squared_error
mean_absolute_error(y_house_test_stand,prediction)


0.22549956951552147

In [77]:
mean_absolute_error(y_house_test_stand_inverse,prediction_inverse)

82453.02198930961