# Sequence Generator for LSTM

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np

In [2]:
unit = np.array([1]*10 + [2]*10)
x = np.arange(1, 21)
y = np.arange(101, 121)
data = np.stack((unit, x, y)).T
print(unit)
print(x)
print(y)
print(data)
print(data.shape)

[1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
[101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
 119 120]
[[  1   1 101]
 [  1   2 102]
 [  1   3 103]
 [  1   4 104]
 [  1   5 105]
 [  1   6 106]
 [  1   7 107]
 [  1   8 108]
 [  1   9 109]
 [  1  10 110]
 [  2  11 111]
 [  2  12 112]
 [  2  13 113]
 [  2  14 114]
 [  2  15 115]
 [  2  16 116]
 [  2  17 117]
 [  2  18 118]
 [  2  19 119]
 [  2  20 120]]
(20, 3)


In [3]:
data = pd.DataFrame(data, columns = ['unit', 'x', 'y'])
data.head()

Unnamed: 0,unit,x,y
0,1,1,101
1,1,2,102
2,1,3,103
3,1,4,104
4,1,5,105


In [4]:
#Generator to create sequences for the training data
#OBS! Not applicable for multiple units/groups
def generator_training_data(df, sequence_length, columns):
    data = df[columns].values
    num_elements = data.shape[0]
    for start, stop in zip(range(0, num_elements-(sequence_length-1)), range(sequence_length, num_elements+1)):
        yield data[start:stop, :]
        

In [5]:
x = zip(range(0,4), range(15, 21))
print(tuple(x))
for y, z in x:
    print(y, z)

((0, 15), (1, 16), (2, 17), (3, 18))


In [6]:
gen = generator_training_data(data, 19, ['x'])
list(gen)

[array([[ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 9],
        [10],
        [11],
        [12],
        [13],
        [14],
        [15],
        [16],
        [17],
        [18],
        [19]]),
 array([[ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 9],
        [10],
        [11],
        [12],
        [13],
        [14],
        [15],
        [16],
        [17],
        [18],
        [19],
        [20]])]

In [7]:
#Wrapper methods such that the generator_training_data function can be applied to data with multiple units
def generator_data_wrapper(df, sequence_length, columns, unit_nos=np.array([])):
    if unit_nos.size <= 0:
        unit_nos = df['unit'].unique()
    #Runs the generator_training_data function for all units
    seperate_unit_gen = (list(generator_training_data(df[df['unit']==unit_no], sequence_length, columns))
               for unit_no in unit_nos)
    #Combine the subsets into a new set of sequences    
    combined_units_gen = np.concatenate(list(seperate_unit_gen)).astype(np.float32)
    return combined_units_gen

In [8]:
gen2 = generator_data_wrapper(data, 10, ['x'])
list(gen2)

[array([[ 1.],
        [ 2.],
        [ 3.],
        [ 4.],
        [ 5.],
        [ 6.],
        [ 7.],
        [ 8.],
        [ 9.],
        [10.]], dtype=float32),
 array([[11.],
        [12.],
        [13.],
        [14.],
        [15.],
        [16.],
        [17.],
        [18.],
        [19.],
        [20.]], dtype=float32)]

In [9]:
def generator_labels(df, sequence_length, label_column):
    data = df[label_column].values
    num_elements = data.shape[0]
    #-1 makes sure that the label returned is the last row of the sequence, not the beginning of the next sequence
    return data[sequence_length-1:num_elements, :]

In [10]:
label_gen = generator_labels(data, 4, ['y'])
list(label_gen)

[array([104]),
 array([105]),
 array([106]),
 array([107]),
 array([108]),
 array([109]),
 array([110]),
 array([111]),
 array([112]),
 array([113]),
 array([114]),
 array([115]),
 array([116]),
 array([117]),
 array([118]),
 array([119]),
 array([120])]

In [11]:
def generator_label_wrapper(df, sequence_length, label, unit_nos=np.array([])):
    if unit_nos.size <= 0:
        unit_nos = df['unit'].unique()
        
    seperate_unit_gen = (generator_labels(df[df['unit']==unit_no], sequence_length, label) 
                for unit_no in unit_nos)
    comined_units_gen = np.concatenate(list(seperate_unit_gen)).astype(np.float32)
    return comined_units_gen
    

In [12]:
label_gen2 = generator_label_wrapper(data, 10, ['y'])
list(label_gen2)

[array([110.], dtype=float32), array([120.], dtype=float32)]