# Support Vector Regression (SVR)

## Dataset

### Layout

* Columns: 5
	* Engine temperature
	* Exhaust vacuum
	* Ambient pressure
	* Relative humidity
	* Energy output
* Rows: 1000s of observations
	* Each row represents observation of captured metrics at power plant
		* Features:
			* Engine temperature
			* Exhaust vacuum
			* Ambient pressure
			* Relative humidity
		* Dependent variable:
			* Energy output

### Background

* Real world dataset from UCI ML repository
	* Web site that contains many real world datasets in which to practice ML
* Combined cycle power plant dataset

### Goals

* Build regression models to predict energy output

## Import Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Import Dataset

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
y = y.reshape(len(y), 1)

In [4]:
print(*X[:25], sep='\n')

[  14.96   41.76 1024.07   73.17]
[  25.18   62.96 1020.04   59.08]
[   5.11   39.4  1012.16   92.14]
[  20.86   57.32 1010.24   76.64]
[  10.82   37.5  1009.23   96.62]
[  26.27   59.44 1012.23   58.77]
[  15.89   43.96 1014.02   75.24]
[   9.48   44.71 1019.12   66.43]
[  14.64   45.   1021.78   41.25]
[  11.74   43.56 1015.14   70.72]
[  17.99   43.72 1008.64   75.04]
[  20.14   46.93 1014.66   64.22]
[  24.34   73.5  1011.31   84.15]
[  25.71   58.59 1012.77   61.83]
[  26.19   69.34 1009.48   87.59]
[  21.42   43.79 1015.76   43.08]
[  18.21   45.   1022.86   48.84]
[  11.04   41.74 1022.6    77.51]
[  14.45   52.75 1023.97   63.59]
[  13.97   38.47 1015.15   55.28]
[  17.76   42.42 1009.09   66.26]
[   5.41   40.07 1019.16   64.77]
[   7.76   42.28 1008.52   83.31]
[  27.23   63.9  1014.3    47.19]
[  27.36   48.6  1003.18   54.93]


In [5]:
print(*y[:25], sep='\n')

[463.26]
[444.37]
[488.56]
[446.48]
[473.9]
[443.67]
[467.35]
[478.42]
[475.98]
[477.5]
[453.02]
[453.99]
[440.29]
[451.28]
[433.99]
[462.19]
[467.54]
[477.2]
[459.85]
[464.3]
[468.27]
[495.24]
[483.8]
[443.61]
[436.06]


## Split Dataset into Training Set and Test Set

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## Feature Scaling

In [7]:
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

In [8]:
print(*X_train[:25], sep='\n')

[-1.13572795 -0.88685592  0.67357894  0.52070558]
[-0.80630243 -0.00971567  0.45145467  0.14531044]
[ 1.77128416  1.84743445  0.24279248 -1.88374143]
[ 1.64623692  1.05274696 -0.75340119 -1.37545092]
[-1.25001843 -1.32895973 -0.00793869  0.40767602]
[-1.00933612 -1.06903724  2.11233839 -0.15952686]
[-0.47553232 -1.13342892 -1.04283584  0.49261945]
[-0.93000507 -0.9488918   0.83007558  1.46398864]
[-1.08194419 -1.01171295  1.12792403 -0.47052941]
[ 0.87444    -0.43218788 -0.56661488 -2.66261785]
[ 0.93763592  1.15483132 -0.44377343  0.40630597]
[ 0.22096734 -0.91276964  0.14519243  0.1692864 ]
[-0.7915119  -1.13264365  0.86541353 -0.31708322]
[ 0.29357542  0.30674585 -0.03486285 -0.47326952]
[ 0.77628464  1.63384256 -0.42526307 -0.46984438]
[ 0.67140631  0.94752154 -0.40506996 -0.22186438]
[-1.13841713 -1.46559572  1.01013086 -0.5965745 ]
[ 0.32181189 -0.86329799 -0.46733206 -1.04732268]
[ 0.34735917  0.42217971  0.59785476 -0.29310725]
[ 0.50602126 -0.23508653 -0.3242975   0.01721027]


In [9]:
print(*X_test[:25], sep='\n')

[  28.66   77.95 1009.56   69.07]
[  17.48   49.39 1021.51   84.53]
[  14.86   43.14 1019.21   99.14]
[  22.46   58.33 1013.21   68.68]
[  18.38   55.28 1020.22   68.33]
[  30.89   74.87 1008.99   56.58]
[  11.74   43.56 1015.14   70.72]
[  18.97   50.59 1016.01   74.9 ]
[  11.42   41.54 1020.21   71.57]
[  23.89   50.16 1004.88   73.94]
[  24.66   63.73 1011.4    74.52]
[  25.7    69.45 1013.86   56.91]
[  25.11   58.79 1015.99   60.55]
[  24.14   63.21 1012.28   89.08]
[  24.73   74.22 1010.15   79.83]
[  20.6    45.38 1014.93   75.78]
[  25.21   64.63 1020.56   51.31]
[  28.61   68.12 1011.71   47.57]
[  19.21   58.49 1011.7    91.29]
[  31.98   73.42 1010.66   60.06]
[  32.38   77.24 1007.7    61.15]
[  28.94   68.67 1005.39   75.24]
[  10.15   41.14 1025.5    91.54]
[  23.67   71.77 1004.76   86.13]
[  21.23   61.5  1008.98   82.14]


In [10]:
print(*y_train[:25], sep='\n')

[1.15069786]
[0.79540777]
[-1.30936356]
[-1.70158975]
[1.10203602]
[1.30899543]
[0.64180051]
[0.65293996]
[1.37465961]
[-0.83329829]
[-1.40082438]
[0.32461909]
[0.26012749]
[-0.40999888]
[-1.03380854]
[-0.95524605]
[1.11552063]
[-0.31560662]
[-0.15613648]
[-0.59878338]
[0.04906406]
[-0.69376192]
[0.60134668]
[1.04164843]
[-0.22121437]


In [11]:
print(*y_test[:25], sep='\n')

[431.23]
[460.01]
[461.14]
[445.9]
[451.29]
[432.68]
[477.5]
[459.68]
[477.5]
[444.99]
[444.37]
[437.04]
[442.34]
[440.74]
[436.55]
[460.24]
[448.66]
[432.94]
[452.82]
[432.2]
[430.96]
[434.65]
[473.56]
[434.47]
[450.92]


## Train SVR Model on Training Set

In [12]:
from sklearn.svm import SVR

regressor = SVR(kernel='rbf')
regressor.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


## Predict Test Set Results

In [13]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)).reshape(-1, 1))

In [14]:
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[434.05 431.23]
 [457.93 460.01]
 [461.02 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


## Evaluate Model Performance

In [15]:
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.9480795111869856