<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Reinforcement Learning for Finance

**Chapter 01 &mdash; Learning through Interaction**

&copy; Dr. Yves J. Hilpisch

<a href="https://tpq.io" target="_blank">https://tpq.io</a> | <a href="https://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>

## Learning

### Tossing a Biased Coin

In [1]:
import numpy as np
from numpy.random import default_rng
rng = default_rng(seed=100)

In [2]:
ssp = [1, 0]

In [3]:
asp = [1, 0]

In [4]:
def epoch():
    tr = 0
    for _ in range(100):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
    return tr

In [5]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

array([56, 47, 48, 55, 55, 51, 54, 43, 55, 40])

In [6]:
rl.mean()

49.968

In [7]:
ssp = [1, 1, 1, 1, 0]

In [8]:
asp = [1, 0]

In [9]:
def epoch():
    tr = 0
    for _ in range(100):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
    return tr

In [10]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

array([53, 56, 40, 55, 53, 49, 43, 45, 50, 51])

In [11]:
rl.mean()

49.924

In [12]:
ssp = [1, 1, 1, 1, 0]

In [13]:
def epoch(n):
    tr = 0
    asp = [0, 1]
    for _ in range(n):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)
    return tr

In [14]:
rl = np.array([epoch(100) for _ in range(250)])
rl[:10]

array([71, 65, 67, 69, 68, 72, 68, 68, 77, 73])

In [15]:
rl.mean()

66.78

In [16]:
from collections import Counter

In [17]:
ssp = [1, 1, 1, 1, 0]

In [18]:
def epoch(n):
    tr = 0
    asp = [0, 1]
    for _ in range(n):
        c = Counter(asp)
        a = c.most_common()[0][0]
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)
    return tr

In [19]:
rl = np.array([epoch(100) for _ in range(250)])
rl[:10]

array([81, 70, 74, 77, 82, 74, 81, 80, 77, 78])

In [20]:
rl.mean()

78.828

### Rolling a Biased Die

In [21]:
ssp = [1, 2, 3, 4, 4, 4, 4, 4, 5, 6]

In [22]:
asp = [1, 2, 3, 4, 5, 6]

In [23]:
def epoch():
    tr = 0
    for _ in range(600):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
    return tr

In [24]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

array([ 92,  96, 106,  99,  96, 107, 101, 106,  92, 117])

In [25]:
rl.mean()

101.22

In [26]:
def epoch():
    tr = 0
    asp = [1, 2, 3, 4, 5, 6]
    for _ in range(600):
        a = rng.choice(asp)
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)
    return tr

In [27]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

array([182, 174, 162, 157, 184, 167, 190, 208, 171, 153])

In [28]:
rl.mean()

176.296

In [29]:
def epoch():
    tr = 0
    asp = [1, 2, 3, 4, 5, 6]
    for _ in range(600):
        c = Counter(asp)
        a = c.most_common()[0][0]
        s = rng.choice(ssp)
        if a == s:
            tr += 1
        asp.append(s)
    return tr

In [30]:
rl = np.array([epoch() for _ in range(250)])
rl[:10]

array([305, 288, 312, 306, 318, 302, 304, 311, 313, 281])

In [31]:
rl.mean()

297.204

In [32]:
cm = 10 ** 40
print(f'{cm:,}')

10,000,000,000,000,000,000,000,000,000,000,000,000,000


<img src="https://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="https://tpq.io" target="_blank">https://tpq.io</a> | <a href="https://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:team@tpq.io">team@tpq.io</a>