In [1]:
import numpy as np

In [40]:
def generate_data(n_samples=100, n_features=4, round=2, seed=None):
    if seed is not None:
        np.random.seed(seed)
    X = np.random.randn(n_samples, n_features)
    y = np.random.randn(n_samples)
    return X.round(round), y.round(round)


In [41]:
def data_lstm(X, y, sequence_length):
    X_lstm = []
    for i in range(len(X)-sequence_length):
        X_lstm.append(X[i:i+sequence_length])
    X_lstm = np.array(X_lstm)
    y_lstm = y[sequence_length:]
    return X_lstm, y_lstm

# vectorized version
def data_lstm_v2(X, y, sequence_length):
    idx = np.arange(sequence_length) + np.arange(len(X) - sequence_length)[:, None]
    X_lstm = X[idx]
    y_lstm = y[sequence_length:]
    return X_lstm, y_lstm

In [56]:
sequence_length = 3
X, y = generate_data(seed=0)
X_lstm, y_lstm = data_lstm(X, y, sequence_length)
X_lstm_2, y_lstm_2 = data_lstm_v2(X, y, sequence_length)
assert np.allclose(X_lstm, X_lstm_2)
assert np.allclose(y_lstm, y_lstm_2)
print(f'[X] {X.shape} -> {X_lstm.shape} | [y] {y.shape} -> {y_lstm_2.shape}')

[X] (100, 4) -> (97, 3, 4) | [y] (100,) -> (97,)


In [68]:
print(y_lstm[-1])
X_lstm[-1]

-0.08


array([[ 0.04, -1.66, -0.99, -1.47],
       [ 1.65,  0.16,  0.57, -0.22],
       [-0.35, -1.62, -0.29, -0.76]])

In [65]:
X[-10:]

array([[ 0.7 ,  0.  ,  0.93,  0.34],
       [-0.02,  0.16, -0.19, -0.39],
       [-0.27, -1.13,  0.28, -0.99],
       [ 0.84, -0.25,  0.05,  0.49],
       [ 0.64, -1.57, -0.21,  0.88],
       [-1.7 ,  0.39, -2.26, -1.02],
       [ 0.04, -1.66, -0.99, -1.47],
       [ 1.65,  0.16,  0.57, -0.22],
       [-0.35, -1.62, -0.29, -0.76],
       [ 0.86,  1.14,  1.47,  0.85]])

In [67]:
y[-10:]

array([-1.03, -0.35,  1.1 ,  1.3 ,  2.7 , -0.07, -0.66, -0.51, -1.02,
       -0.08])

In [69]:
def data_lstm(X, y, sequence_length, y_looks_ahead=True):
    X_lstm = []
    i_max = len(X)-sequence_length+1 if y_looks_ahead else len(X)-sequence_length
    for i in range(i_max):
        X_lstm.append(X[i:i+sequence_length])
    X_lstm = np.array(X_lstm)
    y_lstm = y[sequence_length-1:] if y_looks_ahead else y[sequence_length:]
    return X_lstm, y_lstm

# vectorized version
def data_lstm_v2(X, y, sequence_length, y_looks_ahead=True):
    i_max = len(X)-sequence_length+1 if y_looks_ahead else len(X)-sequence_length
    idx = np.arange(sequence_length) + np.arange(i_max)[:, None]
    X_lstm = X[idx]
    y_lstm = y[sequence_length-1:] if y_looks_ahead else y[sequence_length:]
    return X_lstm, y_lstm

In [70]:
sequence_length = 3
X, y = generate_data(seed=0)
X_lstm, y_lstm = data_lstm(X, y, sequence_length)
X_lstm_2, y_lstm_2 = data_lstm_v2(X, y, sequence_length)
assert np.allclose(X_lstm, X_lstm_2)
assert np.allclose(y_lstm, y_lstm_2)
print(f'[X] {X.shape} -> {X_lstm.shape} | [y] {y.shape} -> {y_lstm_2.shape}')

[X] (100, 4) -> (98, 3, 4) | [y] (100,) -> (98,)


In [74]:
print(y_lstm[0])
X_lstm[0]

0.77


array([[ 1.76,  0.4 ,  0.98,  2.24],
       [ 1.87, -0.98,  0.95, -0.15],
       [-0.1 ,  0.41,  0.14,  1.45]])

In [75]:
X[:10]

array([[ 1.76,  0.4 ,  0.98,  2.24],
       [ 1.87, -0.98,  0.95, -0.15],
       [-0.1 ,  0.41,  0.14,  1.45],
       [ 0.76,  0.12,  0.44,  0.33],
       [ 1.49, -0.21,  0.31, -0.85],
       [-2.55,  0.65,  0.86, -0.74],
       [ 2.27, -1.45,  0.05, -0.19],
       [ 1.53,  1.47,  0.15,  0.38],
       [-0.89, -1.98, -0.35,  0.16],
       [ 1.23,  1.2 , -0.39, -0.3 ]])

In [76]:
y[:10]

array([-0.6 , -1.12,  0.77,  0.36, -1.77,  0.36,  0.81,  0.06, -0.19,
       -0.81])