<a href="https://colab.research.google.com/github/microprediction/timeseries-notebooks/blob/main/tcn_surrogate_experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip uninstall numpy
!pip install --upgrade numpy
!pip install keras
!pip install timemachines
!pip install --upgrade microprediction
!pip install --upgrade realdata

# Training data
We run a univariate model from the timemachines package and generate training examples of k=1 step ahead prediction. 

In [3]:
from realdata import get_values
values = get_values()
len(values)

1000

In [2]:
from microprediction import MicroReader
mr = MicroReader()
streams = mr.get_stream_names()
YS = list(reversed(mr.get_lagged_values(name='electricity-fueltype-nyiso-hydro.json')))
assert len(list(YS))>900
YS[50:450]

ContextualVersionConflict: ignored

In [5]:
from timemachines.skaters.elo.eloensembles import elo_fastest_residual_precision_ensemble

In [10]:
repetitions = 50
samples = 200
size = repetitions*samples
time_steps = 20
import numpy as np 
x_train = np.zeros(shape=(size, time_steps, 1))
y_train = np.zeros(shape=(size, 1))
x_stds = list()
y_true = list()

print(np.shape(x_train))
for j1 in range(repetitions):
  big_y = 10000
  len_y = 0
  while (big_y>1000) or (len_y<900):
    YS = get_values()
    big_y = np.max(YS) if YS else 0 
    len_y = len(YS)
  print(YS[:5])
  print((j1,repetitions))
  print(np.mean(x_stds))
  for j2 in range(samples):
    j = j1*samples+j2
    s = {}
    random_scale = np.random.exponential()/60.0
    y_training = [yt for yt in YS[j2:j2+400]]
    for t in range(samples-20): # jiggle start
        y_training[t] = y_training[t]+1.*np.random.randn()
    for t in range(len(y_training)): # scale randomly
        y_training[t] = y_training[t]*random_scale 
    # Run through and get final prediction 
    for y in y_training[:-1]:
      x,x_std, s = elo_fastest_residual_precision_ensemble(y,k=1,s=s)
    y_true.append(y_training[-1])
    y_train[j,0] = x[0]
    x_stds.append(x_std[0])
    # Give network the last data points
    y_data = y_training[-time_steps-1:-1]
    for k in range(time_steps):
      x_train[j,k,0] = y_data[k]
      
  



(10000, 20, 1)
[30.0, 30.0, 30.0, 30.0, 30.0]
(0, 50)
nan


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


[12.0, 12.0, 12.0, 12.0, 12.0]
(1, 50)
0.008926466457892681
[97.0, 65.0, 65.0, 53.0, 53.0]
(2, 50)
0.01757616085324847
[-0.55202, -0.55404, -0.55607, -0.55445, -0.55242]
(3, 50)
0.053144718768046266
[100.0, 100.0, 119.0, 119.0, 60.0]
(4, 50)
0.04153460419781223
[-0.55404, -0.55607, -0.55445, -0.55242, -0.55647]
(5, 50)
0.21119489805652114
[70.0, 70.0, 53.0, 53.0, 28.0]
(6, 50)
0.17692195087289056
[37.66, 35.98, 35.83, 34.53, 31.58]
(7, 50)
0.1990842328790362
[-0.5, 0.5, 0.5, 0.5, 0.5]
(8, 50)
0.21171574150711564
[10.0, 10.0, 10.0, 11.0, 12.0]
(9, 50)
0.1895554294856056
[12.0, 12.0, 12.0, 12.0, 13.0]
(10, 50)
0.17153134487706279
[368.2856, 365.8852, 365.8852, 364.682, 367.9281]
(11, 50)
0.1585936161995259
[70.0, 70.0, 64.0, 94.0, 81.0]
(12, 50)
0.15640516775846347
[-0.5, 0.5, 0.5, 0.5, 0.5]
(13, 50)
0.16104591888425457
[146.0, 174.0, 166.0, 170.0, 160.0]
(14, 50)
0.1501965961625463
[13.0, 13.0, 13.0, 12.0, 13.0]
(15, 50)
0.15261105269089995
[109.0, 75.0, 98.0, 124.0, 100.0]
(16, 50)
0.1

In [87]:
YS[-1]

3906.0

# Dense NN fitting

In [11]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

# TCN fitting

In [12]:
from tcn import TCN, tcn_full_summary


In [13]:
tcn_layer = TCN(input_shape=(time_steps, 1))
# The receptive field tells you how far the model can see in terms of timesteps.
print('Receptive field size =', tcn_layer.receptive_field)

m = Sequential([
    tcn_layer,
    Dense(1,activation='linear')
])

m.compile(optimizer='adam', loss='mse')

Receptive field size = 253


In [None]:
for _ in range(500):
  m.fit(x_train, y_train, epochs=10, verbose=0)
  y_hat = m.predict(x_train)
  from sklearn.metrics import mean_squared_error
  err1 = mean_squared_error(y_hat,y_train)
  err2 = mean_squared_error(y_hat,y_true)
  err3 = mean_squared_error(y_train,y_true)

  print({'surrogate-model':err1,'surrogate-true':err2,'model-true':err3})
  #print(list(zip(y_hat[:50],y_train[:50])))


{'surrogate-model': 0.01634198233106872, 'surrogate-true': 0.14783343400578627, 'model-true': 0.13941624136154138}
{'surrogate-model': 0.028343202119245216, 'surrogate-true': 0.16839307091056843, 'model-true': 0.13941624136154138}
{'surrogate-model': 0.0048969383678299895, 'surrogate-true': 0.14210722931451436, 'model-true': 0.13941624136154138}
{'surrogate-model': 0.061161652970525754, 'surrogate-true': 0.21466117533703447, 'model-true': 0.13941624136154138}
{'surrogate-model': 0.010153933766188589, 'surrogate-true': 0.14708055010857451, 'model-true': 0.13941624136154138}
{'surrogate-model': 0.004536522937049713, 'surrogate-true': 0.14181089631468602, 'model-true': 0.13941624136154138}
{'surrogate-model': 0.009885819345595832, 'surrogate-true': 0.15336419143769714, 'model-true': 0.13941624136154138}
{'surrogate-model': 0.003543840295044219, 'surrogate-true': 0.14371757055011808, 'model-true': 0.13941624136154138}
{'surrogate-model': 0.0060919422162985055, 'surrogate-true': 0.150426601