# Support Vector Regression (SVR)

## Importing the libraries

In [23]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [24]:
df = pd.read_csv('/Users/ozerozdal/Documents/Regression/Data.csv')
df.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [25]:
X = df.iloc[:, 1:-1].values
y = df.iloc[:, -1].values

In [26]:
print(X)

[[  41.76 1024.07   73.17]
 [  62.96 1020.04   59.08]
 [  39.4  1012.16   92.14]
 ...
 [  74.33 1012.92   36.48]
 [  69.45 1013.86   62.39]
 [  62.52 1017.23   67.87]]


In [27]:
print(y)

[463.26 444.37 488.56 ... 429.57 435.74 453.28]


In [28]:
y = y.reshape(len(y),1)  # fit_transform takes 2D array as input. Let's reshape it to make it 2D array.
print(y)

[[463.26]
 [444.37]
 [488.56]
 ...
 [429.57]
 [435.74]
 [453.28]]


## Splitting the dataset into the Training set and Test set

In [29]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature Scaling

In [30]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

In [31]:
print(X_train)

[[-0.89  0.67  0.52]
 [-0.01  0.45  0.15]
 [ 1.85  0.24 -1.88]
 ...
 [-1.25  0.85  0.13]
 [-1.04  1.55  0.88]
 [ 1.06 -1.2  -2.42]]


In [32]:
print(y_train)

[[ 1.15]
 [ 0.8 ]
 [-1.31]
 ...
 [ 0.28]
 [ 0.49]
 [-1.54]]


## Training the SVR model on the whole dataset

In [33]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
y_train = np.ravel(y_train) # y is a 2D array. The function np.ravel() flattens the 2D array!
regressor.fit(X_train, y_train)  

SVR()

## Predicting Test Set Results

In [34]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[432.98 431.23]
 [460.91 460.01]
 [468.67 461.14]
 ...
 [468.91 473.26]
 [442.3  438.  ]
 [464.64 463.28]]


## Evaluating the Model Performance

In [45]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, explained_variance_score, r2_score

print("Mean absolute error =", round(mean_absolute_error(y_test, y_pred), 4)) 
print("Mean squared error =", round(mean_squared_error(y_test, y_pred), 4)) 
print("Median absolute error =", round(median_absolute_error(y_test, y_pred), 4)) 
print("Explain variance score =", round(explained_variance_score(y_test, y_pred), 4)) 
print("R2 score =", round(r2_score(y_test, y_pred), 4))

Mean absolute error = 4.9894
Mean squared error = 44.0415
Median absolute error = 3.9262
Explain variance score = 0.8495
R2 score = 0.8494
