<a href="https://colab.research.google.com/github/mhuckvale/pals0039/blob/master/Demo_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Demonstration of Recurrent Networks

In [0]:
import numpy as np

%tensorflow_version 2.x
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, GRU, TimeDistributed, Embedding, Flatten

import matplotlib.pyplot as plt
%matplotlib inline

## Example 1 - Clean a noisy waveform
(a) generate a noisy sinusoid

In [0]:
N = 1000

t=np.arange(0,N)
y=5*np.sin(0.1*t)
x=y+np.random.normal(size=N)

plt.figure(figsize=(15,4))
plt.plot(t,x,'r-',alpha=0.5)
plt.plot(t,y,'b-')
plt.grid()
plt.show() 

xtrain=x.reshape(1,N,1)
ytrain=y.reshape(1,N,1)


(b) build a simple recurrent network to clean the signal

In [0]:
model=Sequential()
model.add(SimpleRNN(10,activation='tanh',return_sequences=True,input_shape=(N,1)))
model.add(TimeDistributed(Dense(1,activation='linear')))
model.compile(optimizer='rmsprop',loss='mse')
model.summary()

In [0]:
model.fit(xtrain,ytrain,batch_size=1,epochs=100,verbose=2)


(c) generate some new data for testing and evaluate

In [0]:
y=5*np.sin(0.1*t)
x=y+np.random.normal(size=N)
xtest=x.reshape(1,N,1)

ypred=model.predict(xtest)

plt.figure(figsize=(15,4))
plt.plot(t,x,'r-',alpha=0.5)
plt.plot(t,ypred.flatten(),'b-')
plt.grid()
plt.show() 


## Example 2 - parse sequence
(a) Build some sequences

In [0]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.parse.generate import generate
from nltk import CFG

# use a simple grammar to create two types of sequence A (=unmatched 1/2) or B (=matched 1/2)
seq_grammar="""
S -> 'A' padding middle0 padding | 'B' padding middle1 padding
middle0 -> tokena padding tokenb | tokenb padding tokena
middle1 -> tokena padding tokena | tokenb padding tokenb
padding -> digit | digit padding
tokena -> '1' 
tokenb -> '2'
digit -> '3' | '4' 
"""
grammar = CFG.fromstring(seq_grammar)
print(grammar)

data=[]
labels=[]
for sentence in generate(grammar, depth=7, n=100000):
  data.append(list(map(int,sentence[1:])))
  labels.append(0 if sentence[0]=='A' else 1)

# convert to fixed length numpy arrays
data = pad_sequences(data, dtype='int32', padding='post', truncating='post', value=0)
labels=np.asarray(labels,dtype='int')

ndata=data.shape[0]
seqlen=data.shape[1]
print("generated",ndata,"sequences of length",seqlen)

# generate train and test data from random partition
ntrain=int(0.9*ndata)
idx=np.random.permutation(ndata)
xtrain=data[idx[:ntrain]]
ytrain=labels[idx[:ntrain]]
xtest=data[idx[ntrain:]]
ytest=labels[idx[ntrain:]]

print('xtrain',xtrain[:10])
print('ytrain',ytrain[:10])
print('xtest',xtest[:10])
print('ytest',ytest[:10])


(b) see what a conventional dense network can do

In [0]:
model=Sequential()
model.add(Embedding(5, 5, input_length=seqlen))
model.add(Dense(10,activation='tanh'))
model.add(Flatten())
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [0]:
model.fit(xtrain,ytrain,batch_size=50,epochs=10,validation_split=0.05)
loss,accuracy=model.evaluate(xtest,ytest)
print("Loss=%.3f Accuracy=%.3f%%" % (loss,100*accuracy))

(c) now try a recurrent network

In [0]:
model=Sequential()
model.add(Embedding(5, 5, input_length=seqlen))
model.add(LSTM(10,activation='tanh',return_sequences=False))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [0]:
model.fit(xtrain,ytrain,batch_size=50,epochs=5,validation_split=0.05)
loss,accuracy=model.evaluate(xtest,ytest)
print("Loss=%.3f Accuracy=%.3f%%" % (loss,100*accuracy))