In [3]:
import pandas as pd
import numpy as np

import matplotlib
import matplotlib.pyplot as plt

from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
import math

# riverstation_id = 1
# river = all_levels[all_levels['riverstation_id'] == riverstation_id]['level'].values

fname = "./datos.csv"
data = pd.read_csv(fname, index_col=0)
all_levels = data.iloc[:, :5].values

# Get current size
fig_size = plt.rcParams["figure.figsize"]
 
# Set figure width to 12 and height to 9
fig_size[0] = 12
fig_size[1] = 9
plt.rcParams["figure.figsize"] = fig_size

In [4]:
def train_test_split(dataset, train_frac):
    train_size = int(len(dataset)*train_frac)
    return dataset[:train_size, :], dataset[train_size: ,:]

def create_datasets(dataset, look_back=1, look_ahead=1):
    data_x, data_y = [], []
    for i in range(len(dataset)-look_back-look_ahead+1):
        window = dataset[i:(i+look_back), 0]
        data_x.append(window)
        data_y.append(dataset[i + look_back + look_ahead -1, 0])
    return np.array(data_x), np.array(data_y)

def reverse_scale(data, mean, std):
    for x in np.nditer(data, op_flags=['readwrite']):
        x[...] = x*std + mean
    return data

def mean_absolute_percentage(y, y_pred):
    return np.mean(np.abs((y - y_pred) / y)) * 100

In [None]:
split = 0.7
look_back = 32
results = []

for i in range(0, 6):
    river_results = []
    for j in range(1, 3 * 24):
        look_ahead = j
        river_i = i
        
        river = data.iloc[:, river_i]
        river = river.reshape(len(river), 1)

        #standardize data
        river_mean, river_std = river.mean(), river.std()
        river = preprocessing.scale(river).reshape(len(river), 1)

        train, test = train_test_split(river, split)
        train_x, train_y = create_datasets(train, look_back, look_ahead)
        test_x, test_y = create_datasets(test, look_back, look_ahead)
        clf = SVR(C=1.0, epsilon=0.01)
        clf.fit(train_x, train_y) 
        pred_train = clf.predict(train_x)
        pred_test = clf.predict(test_x)

        #reverse scale
        pred_train = reverse_scale(pred_train, river_mean, river_std)
        pred_test = reverse_scale(pred_test, river_mean, river_std)
        test_y = reverse_scale(test_y, river_mean, river_std)
        train_y = reverse_scale(train_y, river_mean, river_std)

        score = mean_absolute_percentage(test_y, pred_test)
        river_results.append(score)
        
        if(j == 1):
            plt.subplot(221)
            plt.plot(test_y, label="Observed")
            plt.plot(pred_test, color="red", label="Predicted, MAPE: "+ str(round(score, 5))+"%")
            plt.title("1 step ahead prediction")
            plt.ylabel("River Level")
            plt.legend(loc=1, fontsize = 8, framealpha=0.8)
            
            
        if(j == 4):
            plt.subplot(222)
            plt.plot(test_y, label="Observed")
            plt.plot(pred_test, color="red", label="Predicted, MAPE: "+ str(round(score, 5))+"%")
            plt.title("4 step ahead prediction")
            plt.ylabel("River Level")
            plt.legend(loc=1, fontsize = 8, framealpha=0.8)
            
            
        if(j == 8):
            plt.subplot(223)
            plt.plot(test_y, label="Observed")
            plt.plot(pred_test, color="red", label="Predicted, MAPE: "+ str(round(score, 5))+"%")
            plt.title("8 step ahead prediction")
            plt.ylabel("River Level")
            plt.legend(loc=1, fontsize = 8, framealpha=0.8)
            
            
        if(j == 16):
            plt.subplot(224)
            plt.plot(test_y, label="Observed")
            plt.plot(pred_test, color="red", label="Predicted, MAPE: "+ str(round(score, 5))+"%")
            plt.title("16 step ahead prediction")
            plt.legend(loc=1, fontsize = 8, framealpha=0.8)
            plt.ylabel("River Level")
            plt.show()
            
    results.append(river_results)
    
print(results)