### Transforming a univariate timeseries into supervised learning problem

In [1]:
import numpy as np


In [8]:
def split_sequence(sequence, n_steps): 
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1: 
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix] 
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)



In [11]:
# define univariate time series
series = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 
print(series.shape)
# transform to a supervised learning problem
X, y = split_sequence(series, 3)
print(X.shape, y.shape) # show each sample
print("univariate series to supervised learning dataset")
for i in range(len(X)):
    print(X[i], y[i])

(10,)
(7, 3) (7,)
univariate series to supervised learning dataset
[1 2 3] 4
[2 3 4] 5
[3 4 5] 6
[4 5 6] 7
[5 6 7] 8
[6 7 8] 9
[7 8 9] 10


## Preparing data for CNN and LSTM 

The input to every CNN and LSTM layer must be threee dimensional. the 3 dimensions of this input are:
<ul>
    <li> <b>Samples :</b> One sequence is one sample. A batch is comprised of one or more samples
    </li>
    <li><b>Time Steps :</b> One time step is one point of observation in the sample. One sample is
comprised of multiple time steps
    </li>
    <li><b>Features :</b> One feature is one observation at a time step. One time step is comprised of one or more features.</li>
    </ul>

In [1]:
# transform univariate 2d to 3d
from numpy import array
# split a univariate sequence into samples
def split_sequence(sequence, n_steps): 
    X, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + n_steps
    # check if we are beyond the sequence
        if end_ix > len(sequence)-1: 
            break
    # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix] 
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)
# define univariate time series
series = array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) 
print(series.shape)
# transform to a supervised learning problem
X, y = split_sequence(series, 3)
print(X.shape, y.shape)
# transform input from [samples, features] to [samples, timesteps, features] X = X.reshape((X.shape[0], X.shape[1], 1))
print(X.shape)

(10,)
(7, 3) (7,)
(7, 3)


In [3]:
X, y

(array([[1, 2, 3],
        [2, 3, 4],
        [3, 4, 5],
        [4, 5, 6],
        [5, 6, 7],
        [6, 7, 8],
        [7, 8, 9]]),
 array([ 4,  5,  6,  7,  8,  9, 10]))

<i>Suppose you are loading a dataset with 2 columns and 5000 rows. first column is time and second column is sales. now lets setup the data for the an LSTM ?</i>
<br>
There are few problems-->
<ul>
    <li><b>Data Shape :</b> LSTMs expect 3D input, and it can be challenging to get your head around this the first time.</li>
    <li><b>Sequence Length :</b> LSTMs don’t like sequences of more than 200-400 time steps, so the data will need to be split into subsamples.
    </li>
    </ul>

To solve this problem, we have to broken down this into 4 steps.
1. Load the Data
2. Drop the Time Column
3. Split into sequences
4. Reshape Subsequences



### 1. Load the Data
        -- here i am creating a dummy dataset


In [5]:
from numpy import array
# define the dataset
data = list()
n = 5000
for i in range(n):
    data.append([i+1, (i+1)*10]) 
data = array(data) 
print(data[:5, :]) 
print(data.shape)


[[ 1 10]
 [ 2 20]
 [ 3 30]
 [ 4 40]
 [ 5 50]]
(5000, 2)


## 2. Drop the Time Column

In [6]:
from numpy import array
# define the dataset
data = list()
n = 5000
for i in range(n):
    data.append([i+1, (i+1)*10]) 
data = array(data)
# drop time
data = data[:, 1] 
print(data.shape)

(5000,)


## 3. Split into Samples


In [8]:

from numpy import array
# define the dataset
data = list()
n = 5000
for i in range(n):
    data.append([i+1, (i+1)*10])
data = array(data)
# drop time
data = data[:, 1]
# split into samples (e.g. 5000/200 = 25) 
samples = list()
length = 200
# step over the 5,000 in jumps of 200
for i in range(0,n,length):
  # grab from i to i + 200
    sample = data[i:i+length]
    samples.append(sample) 
print(len(samples))

25


## 4. Reshape Subsequences

In [10]:

from numpy import array
# define the dataset
data = list()
n = 5000
for i in range(n):
    data.append([i+1, (i+1)*10]) 
data = array(data)
data = data[:, 1]
# split into samples (e.g. 5000/200 = 25)
samples = list()
length = 200
# step over the 5,000 in jumps of 200
for i in range(0,n,length):
  # grab from i to i + 200
    sample = data[i:i+length]
    samples.append(sample)
# convert list of arrays into 2d array
data = array(samples) 
print(data.shape)

(25, 200)


#### Now, let's use the another Reshape function to bring the data into 3-D form

In [11]:
# example of creating a 3d array of subsequences
from numpy import array
# define the dataset
data = list()
n = 5000
for i in range(n):
    data.append([i+1, (i+1)*10])
data = array(data)
# drop time
data = data[:, 1]
# split into samples (e.g. 5000/200 = 25) 
samples = list()
length = 200
# step over the 5,000 in jumps of 200
for i in range(0,n,length):
  # grab from i to i + 200
    sample = data[i:i+length]
    samples.append(sample)
# convert list of arrays into 2d array
data = array(samples)
# reshape into [samples, timesteps, features]
data = data.reshape((len(samples), length, 1)) 
print(data.shape)

(25, 200, 1)


In [13]:
data[0]

array([[  10],
       [  20],
       [  30],
       [  40],
       [  50],
       [  60],
       [  70],
       [  80],
       [  90],
       [ 100],
       [ 110],
       [ 120],
       [ 130],
       [ 140],
       [ 150],
       [ 160],
       [ 170],
       [ 180],
       [ 190],
       [ 200],
       [ 210],
       [ 220],
       [ 230],
       [ 240],
       [ 250],
       [ 260],
       [ 270],
       [ 280],
       [ 290],
       [ 300],
       [ 310],
       [ 320],
       [ 330],
       [ 340],
       [ 350],
       [ 360],
       [ 370],
       [ 380],
       [ 390],
       [ 400],
       [ 410],
       [ 420],
       [ 430],
       [ 440],
       [ 450],
       [ 460],
       [ 470],
       [ 480],
       [ 490],
       [ 500],
       [ 510],
       [ 520],
       [ 530],
       [ 540],
       [ 550],
       [ 560],
       [ 570],
       [ 580],
       [ 590],
       [ 600],
       [ 610],
       [ 620],
       [ 630],
       [ 640],
       [ 650],
       [ 660],
       [ 6