<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Python for Finance Basics

&copy; Dr. Yves J. Hilpisch | The Python Quants GmbH

http://tpq.io | [training@tpq.io](mailto:trainin@tpq.io) | [@dyjh](http://twitter.com/dyjh)

## `scikit-learn` package

In [None]:
!git clone https://github.com/tpq-classes/pff_basics.git
import sys
sys.path.append('pff_basics')


In [None]:
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn-v0_8')
%config InlineBackend.figure_format = 'svg'

## Supervised Learning

**Classification**

### Boolean Operations

#### Sample Data

Only binary features and labels.

In [None]:
from numpy.random import default_rng

In [None]:
rng = default_rng()

In [None]:
X = rng.integers(0, 2, (100, 2))

In [None]:
X[:7]

In [None]:
y0 = X[:, 0] & X[:, 1]
y0[:7]

In [None]:
y1 = X[:, 0] | X[:, 1]
y1[:7]

In [None]:
y2 = X[:, 0] ^ X[:, 1]
y2[:7]

#### Classification

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
model = GaussianNB()  # 1. step: model instantiation

In [None]:
# LogisticRegression?

In [None]:
model = LogisticRegression(C=1)  # 1. step: model instantiation

In [None]:
# DecisionTreeClassifier?

In [None]:
model = DecisionTreeClassifier(max_depth=2)  # 1. step: model instantiation

In [None]:
# SVC?

In [None]:
model = SVC(C=1)  # 1. step: model instantiation

In [None]:
model.fit(X, y0)  # 2. step: model fitting

In [None]:
p0 = model.predict(X)  # 3. step: prediction
p0

In [None]:
y0 == p0

In [None]:
accuracy_score(y0, p0)

In [None]:
model.fit(X, y1)

In [None]:
p1 = model.predict(X)
p1

In [None]:
accuracy_score(y1, p1)

In [None]:
model.fit(X, y2)

In [None]:
p2 = model.predict(X)
p2

In [None]:
accuracy_score(y2, p2)

### Floating Point Features

#### Sample Data

Combining floating point features with binary labels.

In [None]:
from sklearn.datasets import make_classification

In [None]:
# make_classification?

In [None]:
X, y = make_classification(n_samples=250, n_features=2,
                           n_redundant=0, random_state=100)

In [None]:
X[:7]

In [None]:
y[:7]

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm');

In [None]:
model = GaussianNB()

In [None]:
# model = LogisticRegression(C=1)

In [None]:
# model = DecisionTreeClassifier(max_depth=4)

In [None]:
# model = SVC(C=1, kernel='linear')

In [None]:
model.fit(X, y)

In [None]:
p = model.predict(X)
p[:7]

In [None]:
accuracy_score(y, p)

In [None]:
Xc = X[y == p]
Xf = X[y != p]

In [None]:
plt.scatter(Xc[:, 0], Xc[:, 1], c=y[y == p],
            cmap='coolwarm', marker='o')
plt.scatter(Xf[:, 0], Xf[:, 1], c=y[y != p],
            cmap='coolwarm', marker='x');

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="mailto:training@tpq.io">training@tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> 