## Idea original modificada:
http://philipperemy.github.io/keras-stateful-lstm/

Interestante:

https://machinelearningmastery.com/use-different-batch-sizes-training-predicting-python-keras/

## Generamos una secuencia artificial para ejemplificar el problema
- Cada muestra es una secuencia binaria de 20 bits. 
- Para X: El primer bit es es 1 con una probabilidad de 0.5, el resto son todos ceros
- Para y: La etiqueta es 1 si el primer bit es 1

In [1]:
import numpy as np
N_samples = 1200
N_bits = 20
from numpy.random import choice
one_indexes = choice(a=N_samples, size=int(N_samples / 2), replace=False)
X = np.zeros((N_samples, N_bits))
X[one_indexes, 0] = 1  # very long term memory.
y = X[:,0]

In [2]:
N_train = 1000
X_train = X[:N_train]
y_train = y[:N_train]
X_validation = X[N_train:]
y_validation = y[N_train:]

In [3]:
idx = 1
print('Entrada: ',X_train[idx])
print('Salida:', y_train[idx])

Entrada:  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
Salida: 0.0


In [4]:
print('Training')
print(X_train.shape)
print(y_train.shape)
print('Validation')
print(X_validation.shape)
print(y_validation.shape)

Training
(1000, 20)
(1000,)
Validation
(200, 20)
(200,)


In [5]:
def prepare_sequences(x_train, y_train, window_length):
    windows = []
    windows_y = []
    for i, sequence in enumerate(x_train):
        len_seq = len(sequence)
        for window_start in range(0, len_seq - window_length + 1):
            window_end = window_start + window_length
            window = sequence[window_start:window_end]
            windows.append(window)
            windows_y.append(y_train[i])
    return np.array(windows), np.array(windows_y)

## Que pasa si partimos la secuencia para entrenar?

### Probar:
- window_length = 10, 15, 20
- batch_size = 1, 50, 1000

In [10]:
window_length = 20
X_train_split_1, y_train_split_1 = prepare_sequences(X_train, y_train, window_length)
X_validation_split_1, y_validation_split_1 = prepare_sequences(X_validation, y_validation, window_length)

In [11]:
# Empieza el nuevo vector en (N_bits - window_length + 1)
next_vect_delta = N_bits - window_length + 1
print(X_train_split_1[next_vect_delta])
# Los y's son todos unos si el primer valor del vector fue 1
print(y_train_split_1[next_vect_delta])
print(y_train_split_1[next_vect_delta+1])
print(X_train_split_1.shape)

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
0.0
0.0
(1000, 20)


In [12]:
from keras.layers import SimpleRNN, Dense, LSTM
from keras.models import Sequential
batch_size = 50

model = Sequential()
model.add(LSTM(10, input_shape=(window_length, 1), return_sequences=False, stateful=False))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train_split_1.reshape(-1,window_length,1), y_train_split_1, batch_size=batch_size, epochs=15,
          validation_data=(X_validation_split_1.reshape(-1,window_length,1), y_validation_split_1), shuffle=False)
score, acc = model.evaluate(X_validation_split_1.reshape(-1,window_length,1), y_validation_split_1, batch_size=1, verbose=1)
print(score, acc)

Train on 1000 samples, validate on 200 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15

KeyboardInterrupt: 

## Usando Batch_size de 1, stateful y window_lenght = 1

In [13]:
window_length = 1
X_train_split_2, y_train_split_2 = prepare_sequences(X_train, y_train, window_length)
X_validation_split_2, y_validation_split_2 = prepare_sequences(X_validation, y_validation, window_length)

In [14]:
batch_size = 1

model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


model.fit(X_train_split_2.reshape(-1,window_length,1), y_train_split_2, batch_size=batch_size, epochs=1,
          validation_data=(X_validation_split_2.reshape(-1,window_length,1), y_validation_split_2), shuffle=False)
score, acc = model.evaluate(X_validation_split_2.reshape(-1,window_length,1), y_validation_split_2, batch_size=1, verbose=1)
print(score, acc)

Train on 20000 samples, validate on 4000 samples
Epoch 1/1


Que pasa en este caso? Por que no llega a mejorar la accuracy?

## Reseteo despues de cada secuencia

In [15]:
i = 1
X_validation_split_2[i*N_bits:(i+1)*N_bits]

array([[ 1.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.]])

In [189]:
batch_size = 1

model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

for i in range(X_train_split_2.shape[0]):
    print(i)
    model.fit(X_train_split_2[i*N_bits:(i+1)*N_bits].reshape(-1,window_length,1), 
              y_train_split_2[i*N_bits:(i+1)*N_bits], batch_size=batch_size, epochs=1, shuffle=False, verbose=1)
    model.reset_states()
score, acc = model.evaluate(X_validation_split_2.reshape(-1,window_length,1), y_validation_split_2, batch_size=1, verbose=1)
print(score, acc)

0
Epoch 1/1
1
Epoch 1/1
2
Epoch 1/1
3
Epoch 1/1
4
Epoch 1/1
5
Epoch 1/1
6
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
10
Epoch 1/1
11
Epoch 1/1
12
Epoch 1/1
13
Epoch 1/1
14
Epoch 1/1
15
Epoch 1/1
16
Epoch 1/1
17
Epoch 1/1
18
Epoch 1/1
19
Epoch 1/1
20
Epoch 1/1
21
Epoch 1/1
22
Epoch 1/1
23
Epoch 1/1
24
Epoch 1/1
25
Epoch 1/1
26
Epoch 1/1
27
Epoch 1/1
28
Epoch 1/1
29
Epoch 1/1
30
Epoch 1/1
31
Epoch 1/1
32
Epoch 1/1
33
Epoch 1/1
34
Epoch 1/1
35
Epoch 1/1
36
Epoch 1/1
37
Epoch 1/1
38
Epoch 1/1
39
Epoch 1/1
40
Epoch 1/1
41
Epoch 1/1
42
Epoch 1/1
43
Epoch 1/1
44
Epoch 1/1
45
Epoch 1/1
46
Epoch 1/1
47
Epoch 1/1
48
Epoch 1/1
49
Epoch 1/1
50
Epoch 1/1
51
Epoch 1/1
52
Epoch 1/1
53
Epoch 1/1
54
Epoch 1/1
55
Epoch 1/1
56
Epoch 1/1
57
Epoch 1/1
58
Epoch 1/1
59
Epoch 1/1
60
Epoch 1/1
61
Epoch 1/1
62
Epoch 1/1
63
Epoch 1/1
64
Epoch 1/1
65
Epoch 1/1
66
Epoch 1/1
67
Epoch 1/1
68
Epoch 1/1
69
Epoch 1/1
70
Epoch 1/1
71
Epoch 1/1
72
Epoch 1/1
73
Epoch 1/1
74
Epoch 1/1
75
Epoch 1/1
76
Epoch 1/1
77
Epoch 

178
Epoch 1/1
179
Epoch 1/1
180
Epoch 1/1
181
Epoch 1/1
182
Epoch 1/1
183
Epoch 1/1
184
Epoch 1/1
185
Epoch 1/1
186
Epoch 1/1
187
Epoch 1/1
188
Epoch 1/1
189
Epoch 1/1
190
Epoch 1/1
191
Epoch 1/1
192
Epoch 1/1
193
Epoch 1/1
194
Epoch 1/1
195
Epoch 1/1
196
Epoch 1/1
197
Epoch 1/1
198
Epoch 1/1
199
Epoch 1/1
200
Epoch 1/1
201
Epoch 1/1
202
Epoch 1/1
203
Epoch 1/1
204
Epoch 1/1
205
Epoch 1/1
206
Epoch 1/1
207
Epoch 1/1
208
Epoch 1/1
209
Epoch 1/1
210
Epoch 1/1
211
Epoch 1/1
212
Epoch 1/1
213
Epoch 1/1
214
Epoch 1/1
215
Epoch 1/1
216
Epoch 1/1
217
Epoch 1/1
218
Epoch 1/1
219
Epoch 1/1
220
Epoch 1/1
221
Epoch 1/1
222
Epoch 1/1
223
Epoch 1/1
224
Epoch 1/1
225
Epoch 1/1
226
Epoch 1/1
227
Epoch 1/1
228
Epoch 1/1
229
Epoch 1/1
230
Epoch 1/1
231
Epoch 1/1
232
Epoch 1/1
233
Epoch 1/1
234
Epoch 1/1
235
Epoch 1/1
236
Epoch 1/1
237
Epoch 1/1
238
Epoch 1/1
239
Epoch 1/1
240
Epoch 1/1
241
Epoch 1/1
242
Epoch 1/1
243
Epoch 1/1
244
Epoch 1/1
245
Epoch 1/1
246
Epoch 1/1
247
Epoch 1/1
248
Epoch 1/1
249
Ep

354
Epoch 1/1
355
Epoch 1/1
356
Epoch 1/1
357
Epoch 1/1
358
Epoch 1/1
359
Epoch 1/1
360
Epoch 1/1
361
Epoch 1/1
362
Epoch 1/1
363
Epoch 1/1
364
Epoch 1/1
365
Epoch 1/1
366
Epoch 1/1
367
Epoch 1/1
368
Epoch 1/1
369
Epoch 1/1
370
Epoch 1/1
371
Epoch 1/1
372
Epoch 1/1
373
Epoch 1/1
374
Epoch 1/1
375
Epoch 1/1
376
Epoch 1/1
377
Epoch 1/1
378
Epoch 1/1
379
Epoch 1/1
380
Epoch 1/1
381
Epoch 1/1
382
Epoch 1/1
383
Epoch 1/1
384
Epoch 1/1
385
Epoch 1/1
386
Epoch 1/1
387
Epoch 1/1
388
Epoch 1/1
389
Epoch 1/1
390
Epoch 1/1
391
Epoch 1/1
392
Epoch 1/1
393
Epoch 1/1
394
Epoch 1/1
395
Epoch 1/1
396
Epoch 1/1
397
Epoch 1/1
398
Epoch 1/1
399
Epoch 1/1
400
Epoch 1/1
401
Epoch 1/1
402
Epoch 1/1
403
Epoch 1/1
404
Epoch 1/1
405
Epoch 1/1
406
Epoch 1/1
407
Epoch 1/1
408
Epoch 1/1
409
Epoch 1/1
410
Epoch 1/1
411
Epoch 1/1
412
Epoch 1/1
413
Epoch 1/1
414
Epoch 1/1
415
Epoch 1/1
416
Epoch 1/1
417
Epoch 1/1
418
Epoch 1/1
419
Epoch 1/1
420
Epoch 1/1
421
Epoch 1/1
422
Epoch 1/1
423
Epoch 1/1
424
Epoch 1/1
425
Ep

525
Epoch 1/1
526
Epoch 1/1
527
Epoch 1/1
528
Epoch 1/1
529
Epoch 1/1
530
Epoch 1/1
531
Epoch 1/1
532
Epoch 1/1
533
Epoch 1/1
534
Epoch 1/1
535
Epoch 1/1
536
Epoch 1/1
537
Epoch 1/1
538
Epoch 1/1
539
Epoch 1/1
540
Epoch 1/1
541
Epoch 1/1
542
Epoch 1/1
543
Epoch 1/1
544
Epoch 1/1
545
Epoch 1/1
546
Epoch 1/1
547
Epoch 1/1
548
Epoch 1/1
549
Epoch 1/1
550
Epoch 1/1
551
Epoch 1/1
552
Epoch 1/1
553
Epoch 1/1
554
Epoch 1/1
555
Epoch 1/1
556
Epoch 1/1
557
Epoch 1/1
558
Epoch 1/1
559
Epoch 1/1
560
Epoch 1/1
561
Epoch 1/1
562
Epoch 1/1
563
Epoch 1/1
564
Epoch 1/1
565
Epoch 1/1
566
Epoch 1/1
567
Epoch 1/1
568
Epoch 1/1
569
Epoch 1/1
570
Epoch 1/1
571
Epoch 1/1
572
Epoch 1/1
573
Epoch 1/1
574
Epoch 1/1
575
Epoch 1/1
576
Epoch 1/1
577
Epoch 1/1
578
Epoch 1/1
579
Epoch 1/1
580
Epoch 1/1
581
Epoch 1/1
582
Epoch 1/1
583
Epoch 1/1
584
Epoch 1/1
585
Epoch 1/1
586
Epoch 1/1
587
Epoch 1/1
588
Epoch 1/1
589
Epoch 1/1
590
Epoch 1/1
591
Epoch 1/1
592
Epoch 1/1
593
Epoch 1/1
594
Epoch 1/1
595
Epoch 1/1
596
Ep

610
Epoch 1/1
611
Epoch 1/1
612
Epoch 1/1
613
Epoch 1/1
614
Epoch 1/1
615
Epoch 1/1
616
Epoch 1/1
617
Epoch 1/1
618
Epoch 1/1
619
Epoch 1/1
620
Epoch 1/1
621
Epoch 1/1
622
Epoch 1/1
623
Epoch 1/1
624
Epoch 1/1
625
Epoch 1/1
626
Epoch 1/1
627
Epoch 1/1
628
Epoch 1/1
629
Epoch 1/1
630
Epoch 1/1
631
Epoch 1/1
632
Epoch 1/1
633
Epoch 1/1
634
Epoch 1/1
635
Epoch 1/1
636
Epoch 1/1
637
Epoch 1/1
638
Epoch 1/1
639
Epoch 1/1
640
Epoch 1/1
641
Epoch 1/1
642
Epoch 1/1
643
Epoch 1/1
644
Epoch 1/1
645
Epoch 1/1
646
Epoch 1/1
647
Epoch 1/1
648
Epoch 1/1
649
Epoch 1/1
650
Epoch 1/1
651
Epoch 1/1
652
Epoch 1/1
653
Epoch 1/1
654
Epoch 1/1
655
Epoch 1/1
656
Epoch 1/1
657
Epoch 1/1
658
Epoch 1/1
659
Epoch 1/1
660
Epoch 1/1
661
Epoch 1/1
662
Epoch 1/1
663
Epoch 1/1
664
Epoch 1/1
665
Epoch 1/1
666
Epoch 1/1
667
Epoch 1/1
668
Epoch 1/1
669
Epoch 1/1
670
Epoch 1/1
671
Epoch 1/1
672
Epoch 1/1
673
Epoch 1/1
674
Epoch 1/1
675
Epoch 1/1
676
Epoch 1/1
677
Epoch 1/1
678
Epoch 1/1
679
Epoch 1/1
680
Epoch 1/1
681
Ep

779
Epoch 1/1
780
Epoch 1/1
781
Epoch 1/1
782
Epoch 1/1
783
Epoch 1/1
784
Epoch 1/1
785
Epoch 1/1
786
Epoch 1/1
787
Epoch 1/1
788
Epoch 1/1
789
Epoch 1/1
790
Epoch 1/1
791
Epoch 1/1
792
Epoch 1/1
793
Epoch 1/1
794
Epoch 1/1
795
Epoch 1/1
796
Epoch 1/1
797
Epoch 1/1
798
Epoch 1/1
799
Epoch 1/1
800
Epoch 1/1
801
Epoch 1/1
802
Epoch 1/1
803
Epoch 1/1
804
Epoch 1/1
805
Epoch 1/1
806
Epoch 1/1
807
Epoch 1/1
808
Epoch 1/1
809
Epoch 1/1
810
Epoch 1/1
811
Epoch 1/1
812
Epoch 1/1
813
Epoch 1/1
814
Epoch 1/1
815
Epoch 1/1
816
Epoch 1/1
817
Epoch 1/1
818
Epoch 1/1
819
Epoch 1/1
820
Epoch 1/1
821
Epoch 1/1
822
Epoch 1/1
823
Epoch 1/1
824
Epoch 1/1
825
Epoch 1/1
826
Epoch 1/1
827
Epoch 1/1
828
Epoch 1/1
829
Epoch 1/1
830
Epoch 1/1
831
Epoch 1/1
832
Epoch 1/1
833
Epoch 1/1
834
Epoch 1/1
835
Epoch 1/1
836
Epoch 1/1
837
Epoch 1/1
838
Epoch 1/1
839
Epoch 1/1
840
Epoch 1/1
841
Epoch 1/1
842
Epoch 1/1
843
Epoch 1/1
844
Epoch 1/1
845
Epoch 1/1
846
Epoch 1/1
847
Epoch 1/1
848
Epoch 1/1
849
Epoch 1/1
850
Ep

949
Epoch 1/1
950
Epoch 1/1
951
Epoch 1/1
952
Epoch 1/1
953
Epoch 1/1
954
Epoch 1/1
955
Epoch 1/1
956
Epoch 1/1
957
Epoch 1/1
958
Epoch 1/1
959
Epoch 1/1
960
Epoch 1/1
961
Epoch 1/1
962
Epoch 1/1
963
Epoch 1/1
964
Epoch 1/1
965
Epoch 1/1
966
Epoch 1/1
967
Epoch 1/1
968
Epoch 1/1
969
Epoch 1/1
970
Epoch 1/1
971
Epoch 1/1
972
Epoch 1/1
973
Epoch 1/1
974
Epoch 1/1
975
Epoch 1/1
976
Epoch 1/1
977
Epoch 1/1
978
Epoch 1/1
979
Epoch 1/1
980
Epoch 1/1
981
Epoch 1/1
982
Epoch 1/1
983
Epoch 1/1
984
Epoch 1/1
985
Epoch 1/1
986
Epoch 1/1
987
Epoch 1/1
 1/20 [>.............................] - ETA: 0s - loss: 2.7093e-05 - acc: 1.0000

KeyboardInterrupt: 

## Batch size = 1000 y reshape del dataset

In [235]:
batch_size = 1000
X_train_split, y_train_split = prepare_sequences(X_train, y_train, 1)
N = len(X_train_split)
frac = N/batch_size
print(N, frac)
X_train_split_reshaped = X_train_split.reshape(batch_size,int(frac)).T.reshape(N,1,1)
y_train_split_reshaped = y_train_split.reshape(batch_size,int(frac)).T.reshape(N)
print(X_train_split.shape)
print(X_train_split_reshaped.shape)

20000 20.0
(20000, 1)
(20000, 1, 1)


In [236]:
from keras.layers import SimpleRNN, Dense, LSTM
from keras.models import Sequential
model = Sequential()
model.add(LSTM(10, batch_input_shape=(batch_size, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [237]:
for i in range(N_bits):
    model.fit(X_train_split_reshaped, y_train_split_reshaped, epochs=1, batch_size=batch_size, verbose=1, shuffle=False)
    model.reset_states()

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


## Para poder evaluar tengo que recargar el modelo con batch_size=200 para que sea eficiente

In [198]:
X_val_split, y_val_split = prepare_sequences(X_validation, y_validation, 1)
X_val_split_reshaped = X_val_split.reshape(200,20).T.reshape(4000,1,1)
y_val_split_reshaped = y_val_split.reshape(200,20).T.reshape(4000)

In [199]:
new_model = Sequential()
new_model.add(LSTM(10, batch_input_shape=(200, 1, 1), return_sequences=False, stateful=True))
new_model.add(Dense(1, activation='sigmoid'))
new_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [200]:
old_weights = model.get_weights()
new_model.set_weights(old_weights)

In [46]:
new_model.evaluate(X_val_split_reshaped, y_val_split_reshaped, batch_size=200, verbose=1)

 200/4000 [>.............................] - ETA: 2s

[0.20064812153577805, 1.0]

# Batch size = 100

Hay que tomar de a 100 del total de 1000, hacer un reshape a (100,20) y luego trasponer

In [238]:
X_train_split, y_train_split = prepare_sequences(X_train, y_train, 1)

In [240]:
X_train_split.reshape(1000,20)[:100].T #.flatten()

array([[ 0.,  1.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [206]:
X_train_split.shape

(20000, 1)

In [210]:
X_train_split_reshaped = np.zeros((X_train_split.shape[0]))

In [246]:
batch_size = 100
model = Sequential()
model.add(LSTM(10, batch_input_shape=(batch_size, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [247]:
n_batches = int(len(X_train_split_reshaped)/batch_size)
print(n_batches)
for i in range(10):
    X_to_train = (X_train_split.reshape(1000,20)[i*batch_size:(i+1)*batch_size].T.flatten()).reshape(2000,1,1)
    y_to_train = y_train_split.reshape(1000,20)[i*batch_size:(i+1)*batch_size].T.flatten()
    model.fit(X_to_train, y_to_train, epochs=1, batch_size=batch_size, verbose=1, shuffle=False)
    model.reset_states()

200
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


In [248]:
X_val_split, y_val_split = prepare_sequences(X_validation, y_validation, 1)
X_val_split_reshaped = X_val_split.reshape(200,20).T.reshape(4000,1,1)
y_val_split_reshaped = y_val_split.reshape(200,20).T.reshape(4000)

new_model = Sequential()
new_model.add(LSTM(10, batch_input_shape=(200, 1, 1), return_sequences=False, stateful=True))
new_model.add(Dense(1, activation='sigmoid'))
new_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

old_weights = model.get_weights()
new_model.set_weights(old_weights)

new_model.evaluate(X_val_split_reshaped, y_val_split_reshaped, batch_size=200, verbose=1)

 200/4000 [>.............................] - ETA: 21s

[0.43642631322145464, 1.0]