<a href="https://colab.research.google.com/github/schmidt-marvin/ESI_2022_TecAA/blob/main/KNN_WS2324/nn04/nn04-xx-unstructured.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Einfaches Beispiel RNN
Das erste Beispiel zeigt eine einfache Implementierung eines RNN, das lediglich eine verrauschte Sinusfunktion approximieren soll

In [None]:
#simple expample: approximation of sin+rand by RNN
from matplotlib import pyplot as plt
import numpy as np

import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
#from tensorflow.data import Dataset
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import optimizers

#generate data
v1=np.sin(np.linspace(0,4000,4000*5))+0.4*np.sin(4*np.linspace(0,4000,4000*5))
v1=v1+0.6*np.random.randn(len(v1))
v1=v1/np.max(v1)
v2=v1

#init parameter
train_split=int(len(v1)/2) #how to split training and validation (no test data here)
samp_size=20 #length of a sequence the RNN is supposed to process

batch_size = 300
epochs=8
buffer_size = 100

#Helper function reshape data
def univariate_data(dataset, target_data, start_index, end_index, history_size, target_size):
  data = []
  labels = []

  start_index = start_index + history_size
  if end_index is None:
    end_index = len(dataset) - target_size

  for i in range(start_index, end_index):
    indices = range(i-history_size, i)
    # Reshape data from (history_size,) to (history_size, 1)
    data.append(np.reshape(dataset[indices], (history_size, 1)))
    labels.append(target_data[i+target_size])
  return np.array(data), np.array(labels)


x_train, y_train= univariate_data(v1,v2, 0, train_split,
                                           samp_size,
                                           5)
x_val, y_val= univariate_data(v1,v2, train_split,None,
                                       samp_size,
                                       5)

print(x_train.shape)
print(y_train.shape)
print ('First signal')
print (x_train[0])
print ('\n First Value to predict')
print (y_train[0])

In [None]:

#use keras tool to prepare dataset for training and validation
train_univariate = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_univariate = train_univariate.cache().shuffle(buffer_size).batch(batch_size).repeat()

val_univariate = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_univariate = val_univariate.batch(batch_size).repeat()

In [None]:
# pip install numpy==1.19 (big nope m8)

In [None]:
#build RNN model by means of keras

model = Sequential()

model.add(layers.SimpleRNN(20,activation='tanh',input_shape=x_train.shape[-2:])) #A_1 in upper diagram #,input_shape=x_train.shape[-2:]
model.add(layers.Dense(1,activation='tanh')) #A_2 in the upper diagram

# setup optimizer and define loss
opt=optimizers.SGD(learning_rate=0.001)
model.build()
model.compile(loss='mae', optimizer=opt)

model.summary()

In [None]:
#start training

model.fit(train_univariate, epochs=epochs,
                      steps_per_epoch=500,
                      validation_data=val_univariate, validation_steps=50)

In [None]:
#plot prediction v.s true values
pred_y=model.predict(x_val)

plt.figure()
plt.plot(y_val[0:300],label="true")
plt.plot(pred_y[0:300],label="pred")
plt.legend()
plt.show()

## Einfaches Beispiel LSTM

In [None]:
#build LSTM model by means of keras

model = Sequential()


#model.add(layers.GRU(20,activation='tanh',input_shape=x_train.shape[-2:])) #A_1 in upper diagram
model.add(layers.LSTM(20,activation='tanh',input_shape=x_train.shape[-2:])) #A_1 in upper diagram
model.add(layers.Dense(1,activation='tanh')) #A_2 in the upper diagram

# setup optimizer and define loss
opt=optimizers.Adam(learning_rate=0.0005)
model.compile(loss='mae', optimizer=opt)

model.summary()

In [None]:
#start training

model.fit(train_univariate, epochs=epochs,
                      steps_per_epoch=500,
                      validation_data=val_univariate, validation_steps=50)

In [None]:
#plot prediction v.s true values
pred_y=model.predict(x_val)

plt.figure()
plt.plot(y_val[0:600],label="true")
plt.plot(pred_y[0:600],label="pred")
plt.legend()
plt.show()

## Example classification of movie reviews

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow_datasets as tfds
import tensorflow as tf



dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

encoder = info.features['text'].encoder

print ('Vocabulary size: {}'.format(encoder.vocab_size))

In [None]:
#test word embedding attached to the data set

sample_string = 'Hallo Saarland'

encoded_string = encoder.encode(sample_string)
print (encoded_string)

original_string = encoder.decode(encoded_string)
print (original_string)

In [None]:
#prepare dataset
buffer_size = 10000
batch_size = 64

train_dataset = train_dataset.shuffle(buffer_size)
# train_dataset = train_dataset.padded_batch(batch_size, train_dataset.output_shapes)
train_dataset = train_dataset.padded_batch(batch_size, tf.compat.v1.data.get_output_shapes(train_dataset))

# test_dataset  = test_dataset.padded_batch(batch_size, test_dataset.output_shapes)
test_dataset  = test_dataset.padded_batch(batch_size, tf.compat.v1.data.get_output_shapes(test_dataset))

In [None]:
#build LSTM Model
lstm_model = tf.keras.Sequential()

lstm_model.add(tf.keras.layers.Embedding(encoder.vocab_size, 64))
lstm_model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))
lstm_model.add(tf.keras.layers.Dense(64, activation='relu'))
lstm_model.add(tf.keras.layers.Dense(1, activation='sigmoid'))



#setup training
lstm_model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])
lstm_model.summary()

#start training
lstm_model.fit(train_dataset, epochs=1,
                    validation_data=test_dataset,
                    validation_steps=30)


In [None]:
#test the model

input_text='This is a nice movie'
encoded_text=tf.expand_dims(tf.cast(encoder.encode(input_text),tf.float32),0)
print(encoded_text)
print(lstm_model.predict(encoded_text))