In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from livelossplot import PlotLosses
from tqdm import tnrange, tqdm_notebook
from keras import backend as K
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, BatchNormalization, Dropout, LeakyReLU, Lambda
from keras.layers import Conv2D, MaxPool2D, Flatten, Reshape
from keras.layers.wrappers import TimeDistributed
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
from IPython import display as dp
import os
import pickle

sns.set_style('whitegrid')
sns.despine()

dp.set_matplotlib_formats('retina')

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
epochs = 10000
batch_size = 50
num_hidden = 100
num_vars = 2
horizon = 30
dropout = 0.1

In [3]:
with open('sc.pickle', 'rb') as f:
    sc = pickle.load(f)

In [4]:
flen = 151
fnames = os.listdir('data_sparse')
if '.DS_Store' in fnames:
    fnames.pop(fnames.index('.DS_Store'))
data = np.zeros((len(fnames), flen, num_vars * 2))
for i, fname in enumerate(fnames):
    df = pd.read_csv(os.path.join('data_sparse', fname)).drop(['t'], axis=1)
    data[i] = sc.transform(df.values)

In [5]:
conv_1_dim = 100
conv_1_size = 10
maxpool_1_size = 2

In [6]:
model = Sequential()
model.add(Reshape((flen, num_vars, 1), input_shape=(flen, num_vars)))
model.add(Dropout(0.1))
model.add(Conv2D(conv_1_dim, (conv_1_size, num_vars), activation='relu', 
                 kernel_regularizer='l2', bias_regularizer='l2'))
model.add(MaxPool2D((maxpool_1_size, 1)))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(flen * num_vars, kernel_regularizer='l2', 
                activity_regularizer='l2', bias_regularizer='l2'))
model.add(Reshape((flen, num_vars)))

In [7]:
def nrmse(y_true, y_pred):
    return K.mean(K.sqrt(K.mean(K.sum((y_true - y_pred) ** 2, axis=2), axis=1)) \
                  / K.sqrt(K.mean(K.sum(y_true ** 2, axis=2), axis=1)))

In [8]:
model.compile(optimizer='adam', loss='mse', metrics=[nrmse])

In [10]:
rolling = []
for epoch in tnrange(int(1e4)+1):
    idx = np.random.randint(len(data), size=batch_size)
    batch = data[idx]
    hist_mse, hist_nrmse = model.train_on_batch(batch[:, :, :2],
                                                batch[:, :, 2:])
    rolling.append(hist_nrmse)
    if epoch > 0 and epoch % 1000 == 0:
        print('Epoch', epoch, 'rolling mean RMSE', np.mean(rolling).round(3),
              'max', np.max(rolling).round(3), 'min', np.min(rolling).round(3))
        rolling = []

Epoch 1000 rolling mean RMSE 0.996 max 0.996 min 0.995
Epoch 2000 rolling mean RMSE 0.996 max 0.996 min 0.995
Epoch 3000 rolling mean RMSE 0.996 max 0.996 min 0.995
Epoch 4000 rolling mean RMSE 0.996 max 1.014 min 0.996
Epoch 5000 rolling mean RMSE 0.996 max 0.997 min 0.996
Epoch 6000 rolling mean RMSE 0.996 max 0.996 min 0.996
Epoch 7000 rolling mean RMSE 0.996 max 0.996 min 0.996
Epoch 8000 rolling mean RMSE 0.996 max 0.996 min 0.996
Epoch 9000 rolling mean RMSE 0.996 max 0.996 min 0.996
Epoch 10000 rolling mean RMSE 0.996 max 0.996 min 0.996



In [None]:
model.save_weights('model_cnn_oneconv_reg.h5')

In [None]:
vals = pd.read_csv('data_sparse_test/data_1811.csv').drop(['t'], axis=1).values

In [None]:
vals = sc.transform(vals)

In [None]:
inputs = vals[:, :2]
outputs = vals[:, 2:]

In [None]:
predicted = model.predict(np.expand_dims(inputs, 0))[0]

In [None]:
# var_names = ['$V$', '$\\varphi$', '$P$', '$Q$']
var_names = ['$P$', '$Q$']

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,10))
fig.tight_layout(h_pad=4.0)
for i, (ax, name) in enumerate(zip(axes.flatten(), var_names)):
    ax.plot(predicted[:, i], c='r', label='pred')
    ax.plot(outputs[:, i], c='b', label='true')
    ax.set_title(name, fontsize='xx-large')
    ax.legend(loc='best', fontsize='xx-large')

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,10))
fig.tight_layout(h_pad=4.0)
for i, (ax, name) in enumerate(zip(axes.flatten(), var_names)):
    ax.plot(predicted[:, i], c='r', label='pred')
    ax.plot(outputs[:, i], c='b', label='true')
    ax.set_title(name, fontsize='xx-large')
    ax.legend(loc='best', fontsize='xx-large')

In [None]:
model.save_weights('model.h5')

In [None]:
encoder_model.save_weights('model_encoder.h5')

In [None]:
decoder_model.save_weights('model_decoder.h5')

In [None]:
files_valid = os.listdir('data_sparse_val')
n_files = len(files_valid)
fnames = ['data_sparse_val/' + fname for fname in files_valid if fname != '.DS_Store']
vals = [pd.read_csv(fname).drop(['t'], axis=1).values for fname in fnames]
true = [val[:, 2:] for val in vals]
vals = [sc.transform(val) for val in vals]
inputs = [val[:, :2] for val in vals]
    
predicted = model.predict(np.array(inputs))
predicted = sc.inverse_transform(np.concatenate([inputs, predicted], axis=2))[:, :, 2:]
    
errs = [np.sqrt(np.mean(np.sum((t - p) ** 2, axis=1))) / np.sqrt(np.mean(np.sum(t ** 2, axis=1))) \
        for t, p in zip(true, predicted)]

In [None]:
plt.figure(figsize=(20,10))
sns.distplot(errs)

In [None]:
print(len(np.where(np.array(errs) <= 0.05)[0]) / len(errs),
      np.percentile(errs, 95), np.mean(errs), np.median(errs))

In [None]:
np.argmax(errs)

In [None]:
np.argmin(errs)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,10))
fig.tight_layout(h_pad=4.0)
for i, (ax, name) in enumerate(zip(axes.flatten(), var_names)):
    ax.plot(predicted[197, :, i], c='r', label='pred')
    ax.plot(np.array(true)[197, :, i], c='b', label='true')
    ax.set_title(name, fontsize='xx-large')
    ax.legend(loc='best', fontsize='xx-large')

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,10))
fig.tight_layout(h_pad=4.0)
for i, (ax, name) in enumerate(zip(axes.flatten(), var_names)):
    ax.plot(predicted[182, :, i], c='r', label='pred')
    ax.plot(np.array(true)[182, :, i], c='b', label='true')
    ax.set_title(name, fontsize='xx-large')
    ax.legend(loc='best', fontsize='xx-large')

In [None]:
!mv cnn.h5 model_cnn_oneconv.h5