<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Reinforcement Learning for Finance

**Chapter 01 &mdash; Learning through Interaction**

&copy; Dr. Yves J. Hilpisch

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

## Learning

### Tossing a Biased Coin

In [None]:
!git clone https://github.com/tpq-classes/rl_4_finance.git
import sys
sys.path.append('rl_4_finance')


In [None]:
import numpy as np
from numpy.random import default_rng
rng = default_rng(seed=100)

In [None]:
ssp = [1, 0]  # <1>

In [None]:
asp = [1, 0]  # <2>

In [None]:
def epoch():
    tr = 0
    for _ in range(100):
        a = rng.choice(asp)  # <3>
        s = rng.choice(ssp)  # <4>
        if a == s:
            tr += 1  # <5>
    return tr

In [None]:
rl = np.array([epoch() for _ in range(250)])  # <6>
rl[:10]

In [None]:
rl.mean()  # <7>

In [None]:
ssp = [1, 1, 1, 1, 0]  # <1>

In [None]:
asp = [1, 0]  # <2>

In [None]:
def epoch():
    tr = 0
    for _ in range(100):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
    return tr

In [None]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

In [None]:
rl.mean()

In [None]:
ssp = [1, 1, 1, 1, 0]

In [None]:
def epoch(n):
    tr = 0
    asp = [0, 1]  # <1>
    for _ in range(n):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)  # <2>
    return tr

In [None]:
rl = np.array([epoch(100) for _ in range(250)])
rl[:10]

In [None]:
rl.mean()

In [None]:
from collections import Counter

In [None]:
ssp = [1, 1, 1, 1, 0]

In [None]:
def epoch(n):
    tr = 0
    asp = [0, 1]  # <1>
    for _ in range(n):
        c = Counter(asp)  # <2>
        a = c.most_common()[0][0]  # <3>
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)  # <4>
    return tr

In [None]:
rl = np.array([epoch(100) for _ in range(250)])
rl[:10]

In [None]:
rl.mean()

### Rolling a Biased Die

In [None]:
ssp = [1, 2, 3, 4, 4, 4, 4, 4, 5, 6]  # <1>

In [None]:
asp = [1, 2, 3, 4, 5, 6]  # <2>

In [None]:
def epoch():
    tr = 0
    for _ in range(600):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
    return tr

In [None]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

In [None]:
rl.mean()

In [None]:
def epoch():
    tr = 0
    asp = [1, 2, 3, 4, 5, 6]  # <1>
    for _ in range(600):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)  # <2>
    return tr

In [None]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

In [None]:
rl.mean()

In [None]:
def epoch():
    tr = 0
    asp = [1, 2, 3, 4, 5, 6]  # <1>
    for _ in range(600):
        c = Counter(asp)  # <2>
        a = c.most_common()[0][0]  # <3>
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)  # <4>
    return tr

In [None]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

In [None]:
rl.mean()

In [None]:
cm = 10 ** 40
print(f'{cm:,}')

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>