<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# NLP Basics

**Prediction of Sequences of Numbers (RNNs)**

&copy; Dr. Yves J. Hilpisch

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

## Imports

In [None]:
!git clone https://github.com/tpq-classes/natural_language_processing.git
import sys
sys.path.append('natural_language_processing')


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
np.set_printoptions(suppress=True)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import warnings
warnings.simplefilter('ignore')

In [None]:
from pylab import plt
plt.style.use('seaborn-v0_8')
%config InlineBackend.figure_format = 'svg'

## Recurrent Neural Networks

For schematic representations, see e.g. https://certificate.tpq.io/aiif_topics.pdf.

In [None]:
n = np.arange(15)

In [None]:
n

In [None]:
lags = 3

In [None]:
g = TimeseriesGenerator(n, n, length=lags, batch_size=3)

In [None]:
list(g)

In [None]:
model = Sequential()
model.add(SimpleRNN(12, activation='relu',
                    return_sequences=True,
                    input_shape=(lags, 1)))
model.add(SimpleRNN(12, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer=Adam())

In [None]:
model.summary()

In [None]:
%time model.fit(g, epochs=500, verbose=False)

In [None]:
model.predict(g).flatten()

In [None]:
n[lags:]

In [None]:
model.predict(g).round().flatten()

In [None]:
model.evaluate(g)

## Long Short Term Memory (LSTM) Neural Networks

### LSTM for Estimation

In [None]:
model = Sequential()
model.add(LSTM(12, activation='relu',
                    return_sequences=True,
                    input_shape=(lags, 1)))
model.add(LSTM(12, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer=Adam())

In [None]:
model.summary()

In [None]:
%time model.fit(g, epochs=500, verbose=False)

In [None]:
n[lags:]

In [None]:
model.predict(g).round().flatten()

In [None]:
model.evaluate(g)

### Categorical Classification

In [None]:
list(g)

In [None]:
len(set(n))

In [None]:
class CategoricalTimeseriesGenerator(TimeseriesGenerator):
    def __getitem__(self, index):
        X, y = super().__getitem__(index)
        y_categorical = to_categorical(y, num_classes=len(set(n)))
        return X, y_categorical

In [None]:
g_cat = CategoricalTimeseriesGenerator(n, n, length=lags, batch_size=3)

In [None]:
#list(g_cat)[:2]

In [None]:
model = Sequential()
model.add(LSTM(12, activation='relu',
                    return_sequences=True,
                    input_shape=(lags, 1)))
model.add(LSTM(12, activation='relu'))
model.add(Dense(len(set(n)), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=Adam())

In [None]:
%time model.fit(g_cat, epochs=250, verbose=False)

In [None]:
model.predict(g_cat)[:2]

In [None]:
np.argmax(model.predict(g_cat), axis=1)

In [None]:
model.evaluate(g_cat)

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>