# SVM - Support Vector Regression
---
SVR is quite similar to SVM with some differences:
1. SVR has an additional turntable parameter ε(epsilon) which defines the width of tube around the estimation function.
2. The support vectors are the points that falls outside the tube rather than just the ones at the margin.
3. The slack measures the distance to points outside the tube.

In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler

import plotly.graph_objects as go
import plotly.express as px


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject



In [3]:
df = pd.read_csv('../data/Real_estate.csv', encoding='utf-8')

scaler = MinMaxScaler()
df['X2 house age (scaled)'] = scaler.fit_transform(df[['X2 house age']])
df['X3 distance to the nearest MRT station (scaled)'] = scaler.fit_transform(df[['X3 distance to the nearest MRT station']])

In [4]:
fig = px.scatter(df, x = df['X3 distance to the nearest MRT station'], y = df['Y house price of unit area'],
                    opacity=0.8, color_discrete_sequence=['black'])

fig.update_layout(dict(plot_bgcolor = 'white'))

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


fig.update_layout(title = dict(text="House Price Based on Distance from the Nearest MRT", 
                             font=dict(color='black')))

fig.update_traces(marker = dict(size = 3))

fig.show()

In [None]:
X=df['X3 distance to the nearest MRT station'].values.reshape(-1,1)
y=df['Y house price of unit area'].values

model1 = LinearRegression()
lr = model1.fit(X, y)

model2 = SVR(kernel='rbf', C=1, epsilon=10) # set kernel and hyperparameters
svr = model2.fit(X, y)

x_range = np.linspace(X.min(), X.max(), 100)

y_lr = model1.predict(x_range.reshape(-1, 1)) # Linear regression
y_svr = model2.predict(x_range.reshape(-1, 1)) # SVR

In [14]:
X=df['X3 distance to the nearest MRT station'].values.reshape(-1,1)
y=df['Y house price of unit area'].values

model1 = LinearRegression()
lr = model1.fit(X, y)

model2 = SVR(kernel='rbf', C=1, epsilon=10) 
svr = model2.fit(X, y)

x_range = np.linspace(X.min(), X.max(), 100)

y_lr = model1.predict(x_range.reshape(-1, 1)) 
y_svr = model2.predict(x_range.reshape(-1, 1))

In [15]:
fig = px.scatter(df, x=df['X3 distance to the nearest MRT station'], y=df['Y house price of unit area'], 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=x_range, y=y_lr, name='Linear Regression', line=dict(color='limegreen')))
fig.add_traces(go.Scatter(x=x_range, y=y_svr, name='Support Vector Regression', line=dict(color='red')))
fig.add_traces(go.Scatter(x=x_range, y=y_svr+10, name='+epsilon', line=dict(color='red', dash='dot')))
fig.add_traces(go.Scatter(x=x_range, y=y_svr-10, name='-epsilon', line=dict(color='red', dash='dot')))

fig.update_layout(dict(plot_bgcolor = 'white'))

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_layout(title=dict(text="House Price Based on Distance from the Nearest MRT with Model Predictions (epsilon=10, C=1)", 
                             font=dict(color='black')))
fig.update_traces(marker=dict(size=3))

fig.show()