<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# NLP Basics

**Prediction of Sequences of Numbers**

&copy; Dr. Yves J. Hilpisch

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

## Imports

In [None]:
!git clone https://github.com/tpq-classes/natural_language_processing.git
import sys
sys.path.append('natural_language_processing')


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers.legacy import Adam
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [None]:
np.set_printoptions(suppress=True)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import warnings
warnings.simplefilter('ignore')

In [None]:
from pylab import plt
plt.style.use('seaborn-v0_8')
%config InlineBackend.figure_format = 'svg'

## Simple Sequence

In [None]:
n = np.arange(0, 15)
n

In [None]:
lags = 3

In [None]:
def create_matrix(n, lags=lags):
    data = list()
    for i in range(0, len(n) - lags):
        data.append(n[i:i+lags+1])
    return np.array(data)

In [None]:
data = create_matrix(n)

In [None]:
f = data[:, :lags]  # features data
f[:4]

In [None]:
l = data[:, lags]
l[:4]

In [None]:
reg = np.linalg.lstsq(f, l, rcond=None)[0]
reg

In [None]:
np.dot(f, reg)

In [None]:
model = Sequential()
model.add(Dense(24, activation='relu', input_dim=lags))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer=Adam())

In [None]:
model.fit(f, l, epochs=1000, verbose=False)

In [None]:
model.predict(f)

In [None]:
model.predict(f).round()

In [None]:
l

## Deterministic Sequence

In [None]:
n = np.arange(-5, 5.5)
n

In [None]:
nf = n ** 3
nf

In [None]:
data = create_matrix(nf)
data

In [None]:
f = data[:, :lags]  # features data
f[:4]

In [None]:
l = data[:, lags]  # labels data
l[:4]

In [None]:
def create_model(hu=24):
    model = Sequential()
    model.add(Dense(hu, activation='relu', input_dim=lags))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mse', optimizer=Adam())
    return model

In [None]:
model = create_model()

In [None]:
%time model.fit(f, l, epochs=5000, verbose=False)

In [None]:
model.predict(f)

In [None]:
p = model.predict(f).round().flatten()
p

In [None]:
l

In [None]:
plt.plot(n[lags:], nf[lags:])
plt.plot(n[lags:], p, 'ro');

## Random Sequence

In [None]:
from numpy.random import default_rng

In [None]:
rng = default_rng()  # new way

In [None]:
rng.integers(0, 8, 15)  # new way

In [None]:
np.random.randint(0, 8, 15)  # "legacy" way 

In [None]:
n = np.random.randint(0, 8, 15)
n

In [None]:
data = create_matrix(n)
data

In [None]:
f = data[:, :lags]  # features data
f[:4]

In [None]:
l = data[:, lags]  # labels data
l[:4]

In [None]:
model = create_model()

In [None]:
%time model.fit(f, l, epochs=1000, verbose=False)

In [None]:
model.predict(f)

In [None]:
p = model.predict(f).round().flatten()
p

In [None]:
l

In [None]:
p - l

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>