# CIML Sequence Learning exercise

**Goal:**  Use simpleRNN, and/or GRU, to learn Keras api and options.  

**Problem:** Given a sequence of a real variable and binary indicator, 
              add real values when indicator=1

**Exercise Tasks:**
<br>
1
run notebook and look at graph (near end) with **model mse vs baseline (mean)** to see performance
<br>
2 change options that make up data:
<br>
$\;\;\;\;\;\;$ a. set the "t-fixed" boolean to False so that the time steps when indicator=1 will be random
<br>
$\;\;\;\;\;\;$ b. also, set num1=4 (number of ones=4, for example) 
<br>
$\;\;\;\;\;\;$ c. RERUN 
<br>
3 
a. find the code that defines the SimpleRNN model, copy it to the next cell, and change it to use GRU
<br>
b. find the code cell that compile/fits the model (ie look for comments with "<<<<----" in the text), change the model to use 'GRU', and rerun the notebook
<br>
Is there a difference between SRN or GRU vs baseline?
<br>
What if you increase number of hidden units or layers on one or both?
<br>
<br>
**Other things to try (with more time):**

*change the logic that makes the target:* 
<br>
 try summing the numbers that is 1,2 or 3 steps before when indicator=1 (look for nback variable below) 

*change input to use the random sine waves: (see seqlearning_timeseries notebook )*
<br>
 the target to be the input one step ahead but you have to change the model definition to use TimeDistributed option
 <br>



In [None]:
#IMPORT STATEMENTS
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(1)  # for reproducibility
 
from tensorflow import keras
import tensorflow as tf


**Next cell has the sequence options to choose**

In [None]:
# =======================options to change =================
samplesize_2use=500;  #pick a large enough size
nsteps_2use    =20;    #this is T stpes

                       #<<<<<<<<<<<------------------
num1       =2          #choose number of 1s for binary variable
t_fixed    =True       #True or False,  are the times when it is 1 fixed or variable
t_fix_inds =[5,15]     #  the times to set it to 1

#Now set up parameters to run model   
numunits       =64
act2use        ='relu'  #or relu or sigmoid or tanh

print('options done')

**Next cell generates binary indicator variable that is 1 according to options**

In [None]:
#Generate a binary variable that is 1 exactly num1 times in each sample
#  the time steps to set it to 1 are either fixed or randomly chosen for each sample

nback      =0  #steps to look backward when adding
binary_num1=np.zeros((samplesize_2use,nsteps_2use,1))

if t_fixed:
    for i in range(len(t_fix_inds)):
       binary_num1[:,t_fix_inds[i]]=1
if not t_fixed:
    for i in range(samplesize_2use):             
      choices_fornum1 = np.random.choice(np.arange(nsteps_2use-nback), size=num1, replace=False, p=None) #pick out num1 indices                                      
      for j in choices_fornum1:
          binary_num1[i,j+nback]=1     

#plt.plot(binary_num1[1,:],'.')  #if you want to see the sample
print('binary variable generated')

In [None]:
#Sanity check: this will help check that each raw has right number of 1s
print(max(np.sum(binary_num1,axis=1))  )
print(min(np.sum(binary_num1,axis=1))  )

**Next cells generate random sequence of numbers**

In [None]:
#----- Generate sequence of random numbers  - 
def generate_random_series(batch_size, n_steps, noise_amp):
 series = noise_amp * (np.random.rand(batch_size, n_steps) ) # + noise that is uniform dist. between 0 to 1
 return series[..., np.newaxis].astype(np.float32)

noiseAmp_2use    =1;
my_randnum_seq   =generate_random_series(samplesize_2use,nsteps_2use,noiseAmp_2use)
print(my_randnum_seq.shape)
#plt.plot(my_randnum_seq[1,:,0])
#plt.title('a sample time series of random values unif dist')

**Next cell make the Y target values**

In [None]:
#now combine randnum seq,binary_num1 and make up Y values
X_train=np.concatenate((my_randnum_seq,binary_num1),axis=2)
N,T,P   =X_train.shape

Y_train =np.zeros((N,))  #1 output at each step

for i in range(N):
    locs       = np.where(X_train[i,:,1]==1)        #column 1 of X_train has indicator    
    Y_train[i,]= np.sum(X_train[i,locs[0]-nback,0])     

ymean     =np.mean(Y_train[:,])
ymean_mse = np.mean(np.square(np.subtract(Y_train,ymean)))
plt.plot(Y_train[:,],'.')
plt.ylabel('true value')
plt.xlabel('data sample')
plt.title('Y train targets, mean'+str(round(ymean,5))+' mean mse:'+str(round(ymean_mse,5)))


In [None]:
#to see one sample time series
plt.plot(X_train[0,:,0])
plt.plot(X_train[0,:,1])
plt.legend(['X random','X indicator'])
plt.title('sample data, Y target:'+str(np.round(Y_train[0],2)))

## The next cells set up the model, do training, plot results
## Review the results plot and note the performance relative to baseline mse

In [None]:
# Split data 
from sklearn.model_selection import train_test_split

X_train1, X_test1,Y_train1, Y_test1= train_test_split(X_train,Y_train, test_size=0.33) #use 33% for test data

print(X_train1.shape)
Y_train1.shape
print(Y_test1.shape)

In [None]:
#a Simple RNN setup  
#set return_sequences=True for all recurrent layers
#except the last one, if you only care about the last output
nvar           = P   #set number of variables to P 

mysrn_model = keras.models.Sequential([
    keras.layers.SimpleRNN(numunits, return_sequences=True, input_shape=[None,nvar]),
    keras.layers.SimpleRNN(numunits),
    keras.layers.Dense(1,activation='linear')])     

mysrn_model.summary()

## Add in the GRU definition here:

In [None]:
# Set up a 2 layer GRU RNN; copy the code for that defines the SRN model and change
#  keras.layers.SimpleRNN   to   keras.layers.GRU

#mygru_model = keras.models.Sequential([  .........
 
        #<<<<<<<<<<<<<<---------------------- 

#mygru_model.summary()


In [None]:
#create a function to be 'called' back by keras during training
my_early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_mse', 
    verbose=1,
    patience=10,   #num  epochs with no improvement after which training will be stopped.
    mode='min',
    restore_best_weights=True)


In [None]:
numepochs=100

my_model=mysrn_model   #<<<<<<<<<----- you can change the model here and run these cells again
#my_model=mygru_model   

my_model.compile(optimizer='adam',  # just use 'adam' to get defaults
                loss='mse',                  
                metrics='mse')                

fit_result = my_model.fit(X_train1,y=Y_train1,
                    validation_data=(X_test1,Y_test1),
                    epochs=numepochs,batch_size=32,verbose=1,callbacks=[my_early_stopping])


In [None]:
# Plot performance 
print('scores on validation data:')
mytest_eval=my_model.evaluate(X_test1,Y_test1)

metric2plot='mse'
# summarize history for accuracy
plt.figure()
plt.plot(fit_result.history[metric2plot])
plt.plot(fit_result.history['val_'+metric2plot])
plt.title(my_model.layers[0].name+'; '+str(num1)+' #1s; fixed:'+str(t_fixed)+' nback:'+str(nback)+'; '+metric2plot+' over training;'+ ' basemse: '+str(np.around(ymean_mse,5)))
plt.ylabel(metric2plot)
plt.xlabel('epoch')
#plt.ylim([0,1])
plt.legend(['train', 'valid'], loc='lower left')
plt.show()

In [None]:
# ----------------------

In [None]:
#Some code to get training and test set predictions
#(just for exploring performance)   unblock it if you want to try it
if 0:
 mytrain_pred=my_model.predict(X_train1[:,:,:])
 plt.plot(Y_train1,'.')
 plt.plot(mytrain_pred,'.')
 plt.ylabel('value')
 plt.xlabel('nth data sample')
 plt.legend(['train,targets', 'predictions'], loc='lower left')


In [None]:
if 0:
  mytest_pred=my_model.predict(X_test1[:,:,:])
  plt.plot(Y_test1,'.')
  plt.plot(mytest_pred,'.')
  plt.ylabel('value')
  plt.xlabel('nth data sample')
  plt.legend(['test,targets', 'predictions'], loc='lower left')
