<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

# Artificial Intelligence in Finance

## Artificial Intelligence

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

## Unsupervised Learning

In [None]:
import numpy as np
import pandas as pd
from pylab import plt, mpl
plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
np.set_printoptions(precision=4, suppress=True)

In [None]:
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

In [None]:
x, y = make_blobs(n_samples=100, centers=4,
                  random_state=500, cluster_std=1.25)

In [None]:
model = KMeans(n_clusters=4, random_state=0)

In [None]:
model.fit(x)

In [None]:
y_ = model.predict(x)

In [None]:
y_

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(x[:, 0], x[:, 1], c=y_,  cmap='coolwarm');

## Reinforcement Learning

In [None]:
ssp = [1, 1, 1, 1, 0]

In [None]:
asp = [1, 0]

In [None]:
def epoch():
    tr = 0
    for _ in range(100):
        a = np.random.choice(asp)
        s = np.random.choice(ssp)
        if a == s:
            tr += 1
    return tr

In [None]:
rl = np.array([epoch() for _ in range(15)])
rl

In [None]:
rl.mean()

In [None]:
ssp = [1, 1, 1, 1, 0]

In [None]:
def epoch():
    tr = 0
    asp = [0, 1]
    for _ in range(100):
        a = np.random.choice(asp)
        s = np.random.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)
    return tr

In [None]:
rl = np.array([epoch() for _ in range(15)])
rl

In [None]:
rl.mean()

## Supervised Learning

### OLS Regression

In [None]:
def f(x):
    return 2 * x ** 2 - x ** 3 / 3

In [None]:
x = np.linspace(-2, 4, 25)
x

In [None]:
y = f(x)
y

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'ro');

In [None]:
beta = np.cov(x, y, ddof=0)[0, 1] / np.var(x)
beta

In [None]:
alpha = y.mean() - beta * x.mean()
alpha

In [None]:
y_ = alpha + beta * x

In [None]:
MSE = ((y - y_) ** 2).mean()
MSE

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'ro', label='sample data')
plt.plot(x, y_, lw=3.0, label='linear regression')
plt.legend();

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'ro', label='sample data')
for deg in [1, 2, 3]:
    reg = np.polyfit(x, y, deg=deg)
    y_ = np.polyval(reg, x)
    MSE = ((y - y_) ** 2).mean()
    print(f'deg={deg} | MSE={MSE:.5f}')
    plt.plot(x, np.polyval(reg, x), label=f'deg={deg}')
plt.legend();

In [None]:
reg

## Neural Network &mdash; `sklearn`

In [None]:
from sklearn.neural_network import MLPRegressor

In [None]:
model = MLPRegressor(hidden_layer_sizes=3 * [256],
                     learning_rate_init=0.03,
                     max_iter=5000)

In [None]:
model.fit(x.reshape(-1, 1), y)

In [None]:
y_ = model.predict(x.reshape(-1, 1))

In [None]:
MSE = ((y - y_) ** 2).mean()
MSE

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'ro', label='sample data')
plt.plot(x, y_, lw=3.0, label='dnn estimation')
plt.legend();

### Neural Network &mdash; `Keras`

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import tensorflow as tf
from tensorflow import keras
tf.random.set_seed(100)

In [None]:
from keras.layers import Dense
from keras.models import Sequential

In [None]:
model = Sequential()
model.add(Dense(256, activation='relu', input_dim=1)) # <2>
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='rmsprop')

In [None]:
((y - y_) ** 2).mean()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'ro', label='sample data')
for _ in range(1, 6):
    model.fit(x, y, epochs=100, verbose=False)
    y_ =  model.predict(x)
    MSE = ((y - y_.flatten()) ** 2).mean()
    print(f'round={_} | MSE={MSE:.5f}')
    plt.plot(x, y_, '--', label=f'round={_}')
plt.legend();

## Universal Approximation

In [None]:
np.random.seed(0)
x = np.linspace(-1, 1)
y = np.random.random(len(x)) * 2 - 1

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'ro', label='sample data')
for deg in [1, 5, 9, 11, 13, 15]:
    reg = np.polyfit(x, y, deg=deg)
    y_ = np.polyval(reg, x)
    MSE = ((y - y_) ** 2).mean()
    print(f'deg={deg:2d} | MSE={MSE:.5f}')
    plt.plot(x, np.polyval(reg, x), label=f'deg={deg}')
plt.legend();

In [None]:
model = Sequential()
model.add(Dense(256, activation='relu', input_dim=1))
for _ in range(3):
    model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='rmsprop')

In [None]:
model.summary()

In [None]:
%%time
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'ro', label='sample data')
for _ in range(1, 8):
    model.fit(x, y, epochs=500, verbose=False)
    y_ =  model.predict(x)
    MSE = ((y - y_.flatten()) ** 2).mean()
    print(f'round={_} | MSE={MSE:.5f}')
    plt.plot(x, y_, '--', label=f'round={_}')
plt.legend();

## Importance of Big Data

In [None]:
f = 5
n = 10

In [None]:
np.random.seed(100)

In [None]:
x = np.random.randint(0, 2, (n, f))
x

In [None]:
y = np.random.randint(0, 2, n)
y

In [None]:
model = Sequential()
model.add(Dense(256, activation='relu', input_dim=f))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop',
             metrics=['acc'])

In [None]:
hist = model.fit(x, y, epochs=50, verbose=False)

In [None]:
y_ = np.where(model.predict(x).flatten() > 0.5, 1, 0)
y_

In [None]:
y == y_

In [None]:
res = pd.DataFrame(hist.history)

In [None]:
res.plot(figsize=(10, 6));

In [None]:
f = 10
n = 250

In [None]:
np.random.seed(100)

In [None]:
x = np.random.randint(0, 2, (n, f))
x[:4]

In [None]:
y = np.random.randint(0, 2, n)
y[:4]

In [None]:
2 ** f

In [None]:
fcols = [f'f{_}' for _ in range(f)]
fcols

In [None]:
data = pd.DataFrame(x, columns=fcols)
data['l'] = y

In [None]:
data.info()

In [None]:
grouped = data.groupby(list(data.columns))

In [None]:
freq = grouped['l'].size().unstack(fill_value=0)

In [None]:
freq['sum'] = freq[0] + freq[1]

In [None]:
freq.head(10)

In [None]:
freq['sum'].describe().astype(int)

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
model = MLPClassifier(hidden_layer_sizes=[128, 128, 128],
                      max_iter=1000, random_state=100)

In [None]:
model.fit(data[fcols], data['l'])

In [None]:
accuracy_score(data['l'], model.predict(data[fcols]))

In [None]:
split = int(len(data) * 0.7)

In [None]:
train = data[:split]
test = data[split:]

In [None]:
model.fit(train[fcols], train['l'])

In [None]:
accuracy_score(train['l'], model.predict(train[fcols]))

In [None]:
accuracy_score(test['l'], model.predict(test[fcols]))

In [None]:
factor = 50

In [None]:
big = pd.DataFrame(np.random.randint(0, 2, (factor * n, f)),
                   columns=fcols)

In [None]:
big['l'] = np.random.randint(0, 2, factor * n)

In [None]:
train = big[:split]
test = big[split:]

In [None]:
model.fit(train[fcols], train['l'])

In [None]:
accuracy_score(train['l'], model.predict(train[fcols]))

In [None]:
accuracy_score(test['l'], model.predict(test[fcols]))

In [None]:
grouped = big.groupby(list(data.columns))

In [None]:
freq = grouped['l'].size().unstack(fill_value=0)

In [None]:
freq['sum'] = freq[0] + freq[1]

In [None]:
freq.head(6)

In [None]:
freq['sum'].describe().astype(int)

In [None]:
8 ** 25

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

<br><br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>