## Support vector machine trong dự báo năng lượng

### Read dataset

In [None]:
import numpy as np
import pandas as pd
data = pd.read_csv("./energy.csv")

In [13]:
data.head()

Unnamed: 0,Time,Power consumption
0,2019-03-01 0:00,10011.84
1,2019-03-01 0:01,10119.32
2,2019-03-01 0:02,9433.59
3,2019-03-01 0:03,10213.93
4,2019-03-01 0:04,10026.23


### Convert time series to supervised

In [10]:
"""
	Frame a time series as a supervised learning dataset.
	Arguments:
		data: Sequence of observations as a list or NumPy array.
		n_in: Number of lag observations as input (X).
		n_out: Number of observations as output (y).
		dropnan: Boolean whether or not to drop rows with NaN values.
	Returns:
		Pandas DataFrame of series framed for supervised learning.
	"""
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    dff = pd.DataFrame(data)
    cols, names = list(), list()
    for i in range(n_in, 0, -1):
        cols.append(dff.shift(-i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    for i in range(0, n_out):
        cols.append(dff.shift(-i))
        if i==0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1)) for j in range(n_vars)]        
        agg = pd.concat(cols, axis=1)
        agg.columns = names
        if dropnan:
            agg.dropna(inplace=True)
        return agg

### Data scaling

In [23]:
from sklearn.preprocessing import MinMaxScaler

# Chuẩn hóa dữ liệu về đoạn [0,1]
df = pd.DataFrame(data, columns=["Power consumption"])
values = df.values
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(values)

reframed = series_to_supervised(scaled, 5,1 )
r = list(range(df.shape[1]+1, 2*df.shape[1]))
reframed.drop(reframed.columns[r], axis=1, inplace=True)
reframed

Unnamed: 0,var1(t-5),var1(t-4),var1(t-3),var1(t-2),var1(t-1),var1(t)
0,0.707974,0.669923,0.682464,0.630324,0.676143,0.668961
1,0.720851,0.707974,0.669923,0.682464,0.630324,0.676143
2,0.712798,0.720851,0.707974,0.669923,0.682464,0.630324
3,0.669816,0.712798,0.720851,0.707974,0.669923,0.682464
4,0.663816,0.669816,0.712798,0.720851,0.707974,0.669923
...,...,...,...,...,...,...
308150,0.598492,0.601722,0.625437,0.642619,0.643162,0.650112
308151,0.587078,0.598492,0.601722,0.625437,0.642619,0.643162
308152,0.583641,0.587078,0.598492,0.601722,0.625437,0.642619
308153,0.589995,0.583641,0.587078,0.598492,0.601722,0.625437


### Split dataset to train and test

In [24]:
values = reframed.values
n_train_time = 50000
train = values[:n_train_time, :]
test = values[n_train_time:, :]
train_x, train_y = train[:, :-1], train[:, -1]   # train_x: n-1 cột đầu tiên, train_y: cột cuối cùng (operation)
test_x, test_y = test[:, :-1], test[:, -1]

### Build and fit model

In [25]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=200, random_state=0)
regressor.fit(train_x,train_y)

### Predict

In [26]:
predict_y = regressor.predict(test_x)

### Evaluate

In [28]:
from keras.models import load_model
from sklearn.metrics import mean_squared_error, r2_score

size = df.shape[1]

yhat = predict_y.reshape(predict_y.shape[0],1)
test_x = test_x.reshape(test_x.shape[0], size * 5)

# invert scalling
inv_yhat = np.concatenate((yhat, test_x[:,1-size * 5:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]

test_y = test_y.reshape(len(test_y), 1)
inv_y = np.concatenate((test_y, test_x[:, 1-size * 5:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

# caculate RSME
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RSME: %.3f' % rmse)

Test RSME: 724.756
