<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Machine Learning for Finance

## Types of Machine Learning

Dr Yves J Hilpisch | The Python Quants GmbH

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>

In [None]:
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt

In [None]:
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'
np.random.seed(1000)
np.set_printoptions(suppress=True, precision=4)
%config InlineBackend.figure_format = 'svg'

## Unsupervised Learning

### The Data

In [None]:
from sklearn.datasets import make_blobs

In [None]:
X, y = make_blobs(n_samples=250, centers=4,
                  random_state=500, cluster_std=1.25)  

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(X[:, 0], X[:, 1], s=50);

### K-Means Clustering

In [None]:
from sklearn.cluster import KMeans  

In [None]:
model = KMeans(n_clusters=4, random_state=0)  # 1. step

In [None]:
model.fit(X)  # 2. step

In [None]:
y_kmeans = model.predict(X)  # 3. step

In [None]:
y_kmeans[:12]  

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans,  cmap='coolwarm');

### Gaussian Mixtures

In [None]:
from sklearn.mixture import GaussianMixture

In [None]:
# GaussianMixture?

In [None]:
model = GaussianMixture(n_components=4, random_state=0)

In [None]:
model.fit(X)

In [None]:
y_gm = model.predict(X)

In [None]:
len(y_gm)

In [None]:
y_gm[:5]

In [None]:
model.predict_proba(X)[:5]

In [None]:
y_gm[:12]

In [None]:
(y_gm == y_kmeans).all()  

## Supervised Learning

### The Data

In [None]:
from sklearn.datasets import make_classification

In [None]:
n_samples = 100

In [None]:
X, y = make_classification(n_samples=n_samples, n_features=2, n_informative=2,
                           n_redundant=0, n_repeated=0, random_state=250)

In [None]:
X[:5]  

In [None]:
X.shape  

In [None]:
y[:5]  

In [None]:
y.shape  

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(x=X[:, 0], y=X[:, 1], c=y, cmap='coolwarm');

### Gaussian Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [None]:
model = GaussianNB()  # 1. step

In [None]:
model.fit(X, y)  # 2. step

In [None]:
model.predict_proba(X).round(4)[:5]  

In [None]:
pred = model.predict(X)  # 3. step

In [None]:
pred  

In [None]:
pred == y  

In [None]:
accuracy_score(y, pred)

In [None]:
Xc = X[y == pred]  
Xf = X[y != pred]

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(x=Xc[:, 0], y=Xc[:, 1], c=y[y == pred],
            marker='o', cmap='coolwarm')  
plt.scatter(x=Xf[:, 0], y=Xf[:, 1], c=y[y != pred],
            marker='x', cmap='coolwarm');  

### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model = LogisticRegression(C=1)

In [None]:
model.fit(X, y)

In [None]:
model.predict_proba(X).round(4)[:5]

In [None]:
pred = model.predict(X)

In [None]:
pred[:5]

In [None]:
accuracy_score(y, pred)

In [None]:
Xc = X[y == pred]
Xf = X[y != pred]

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(x=Xc[:, 0], y=Xc[:, 1], c=y[y == pred],
            marker='o', cmap='coolwarm')
plt.scatter(x=Xf[:, 0], y=Xf[:, 1], c=y[y != pred],
            marker='x', cmap='coolwarm');

### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
# DecisionTreeClassifier?

In [None]:
model = DecisionTreeClassifier(max_depth=1)

In [None]:
%time model.fit(X, y)

In [None]:
model.predict_proba(X).round(4)[:5]

In [None]:
pred = model.predict(X)

In [None]:
pred[:5]

In [None]:
accuracy_score(y, pred)

In [None]:
Xc = X[y == pred]
Xf = X[y != pred]

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(x=Xc[:, 0], y=Xc[:, 1], c=y[y == pred],
            marker='o', cmap='coolwarm')
plt.scatter(x=Xf[:, 0], y=Xf[:, 1], c=y[y != pred],
            marker='x', cmap='coolwarm');

In [None]:
print('{:>8s} | {:8s}'.format('depth', 'accuracy'))
print(20 * '-')
for depth in range(1, 7):
    model = DecisionTreeClassifier(max_depth=depth)
    model.fit(X, y)
    acc = accuracy_score(y, model.predict(X))
    print('{:8d} | {:8.2f}'.format(depth, acc))

### Deep Neural Network

#### scikit-learn

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
# MLPClassifier?

In [None]:
model = MLPClassifier(solver='lbfgs', alpha=1e-5, max_iter=500,
                    hidden_layer_sizes=2 * [75], random_state=10)

In [None]:
%time model.fit(X, y)

In [None]:
pred = model.predict(X)
pred

In [None]:
accuracy_score(y, pred)

#### Tensorflow/Keras

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [None]:
tf.random.set_seed(1)
np.random.seed(1)

In [None]:
features = 2

In [None]:
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=features))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
X[:5]

In [None]:
model.fit(X, y, epochs=500, verbose=False)

In [None]:
model.evaluate(X, y)

In [None]:
pred = np.where(model.predict(X) > 0.5, 1, 0)

In [None]:
pred.flatten()

## Feature Transforms

In [None]:
from sklearn import preprocessing

In [None]:
X[:5]

In [None]:
Xs = preprocessing.StandardScaler().fit_transform(X)  
Xs[:5]

In [None]:
Xm = preprocessing.MinMaxScaler().fit_transform(X)  
Xm[:5]

In [None]:
Xn1 = preprocessing.Normalizer(norm='l1').transform(X)  
Xn1[:5]

In [None]:
Xn2 = preprocessing.Normalizer(norm='l2').transform(X)  
Xn2[:5]

In [None]:
plt.figure(figsize=(10, 6))
markers = ['o', '.', 'x', '^', 'v']
data_sets = [X, Xs, Xm, Xn1, Xn2]
labels = ['raw', 'standard', 'minmax', 'norm(1)', 'norm(2)']
for x, m, l in zip(data_sets, markers, labels):
    plt.scatter(x=x[:, 0], y=x[:, 1], c=y,
            marker=m, cmap='coolwarm', label=l)
plt.legend();

In [None]:
X[:5]

In [None]:
Xb = preprocessing.Binarizer().fit_transform(X)  
Xb[:5]

In [None]:
2 ** 2  

In [None]:
Xd = np.digitize(X, bins=[-1, 0, 1])  
Xd[:5]

In [None]:
4 ** 2  

## Train-Test Splits 

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [None]:
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.33,
                                                    random_state=0)

In [None]:
model = SVC(C=1, kernel='linear')

In [None]:
model.fit(train_x, train_y)  

In [None]:
pred_train = model.predict(train_x)  

In [None]:
accuracy_score(train_y, pred_train)  

In [None]:
pred_test = model.predict(test_x)

In [None]:
test_y == pred_test  

In [None]:
accuracy_score(test_y, pred_test)  

In [None]:
test_c = test_x[test_y == pred_test]
test_f = test_x[test_y != pred_test]

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(x=test_c[:, 0], y=test_c[:, 1], c=test_y[test_y == pred_test],
            marker='o', cmap='coolwarm')
plt.scatter(x=test_f[:, 0], y=test_f[:, 1], c=test_y[test_y != pred_test],
            marker='x', cmap='coolwarm');

In [None]:
bins = np.linspace(-4.5, 4.5, 50)

In [None]:
bins

In [None]:
Xd = np.digitize(X, bins=bins)

In [None]:
Xd[:5]

In [None]:
# Xd = X

In [None]:
train_x, test_x, train_y, test_y = train_test_split(Xd, y, test_size=0.33,
                                                    random_state=0)

In [None]:
print('{:>8s} | {:8s}'.format('kernel', 'accuracy'))
print(20 * '-')
for kernel in ['linear', 'poly', 'rbf', 'sigmoid']:
    model = SVC(C=1, kernel=kernel)
    model.fit(train_x, train_y)
    acc = accuracy_score(test_y, model.predict(test_x))
    print('{:>8s} | {:8.3f}'.format(kernel, acc))

<img src='http://hilpisch.com/tpq_logo.png' width="35%" align="right">

<br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>