## RDP Learning

In this notebook, we show how to perform PAC learning
over RDPs.

### Experiment 1: Rotating MAB

In [16]:

from typing import Tuple

import gym
import numpy as np
from gym.wrappers import TimeLimit


from notebooks.utils import render_automaton
from src import NonMarkovianRotatingMAB
from src.learn_pdfa.base import learn_pdfa, Algorithm
from src.learn_pdfa.utils.generator import MultiprocessedGenerator
from src.learn_rdps import random_exploration_policy, RDPGenerator
from src.pdfa import PDFA
from src.pdfa.base import FINAL_SYMBOL
from functools import partial

DEFAULT_CONFIG = dict(
    max_episode_steps=30,
    delta=0.1,
    n_upperbound=5,
    nb_processes=8,)

char2str = lambda c: str(rdp_generator.decoder(c)) if c!=FINAL_SYMBOL else "-1"

env = NonMarkovianRotatingMAB(winning_probs=[0.9, 0.2])

print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")

s = env.reset()
print(f"Initial state: {s}")

action = 0
sp, reward, _, _ = env.step(action)
print("-" * 10)
print(f"Action: {action}")
print(f"Next state: {sp}")
print(f"Reward: {reward}")

sp, reward, _, _ = env.step(action)
print("-" * 10)
print(f"Action: {action}")
print(f"Next state: {sp}")
print(f"Reward: {reward}")


Observation space: Discrete(2)
Action space: Discrete(2)
Initial state: 0
----------
Action: 0
Next state: 1
Reward: 1.0
----------
Action: 0
Next state: 1
Reward: 1.0


### Learning

In [17]:
def learning_rotating_mab(
    stop_probability: float,
    winning_probabilities: Tuple[float, ...],
    max_episode_steps: int,
    nb_samples: int,
    delta: float,
    n_upperbound: int,
    nb_processes: int = 8,
) -> Tuple[RDPGenerator, PDFA]:
    """Test learning of Rotating MAB."""
    env = gym.make("NonMarkovianRotatingMAB-v0", winning_probs=winning_probabilities)
    env = TimeLimit(env, max_episode_steps=max_episode_steps)

    policy = partial(random_exploration_policy, env)

    rdp_generator = RDPGenerator(
        env, policy=policy, nb_rewards=2, stop_probability=stop_probability
    )

    examples = rdp_generator.sample(n=1000)

    print(
        f"Apriori expected length of traces: 1/stop_prob = {1 / (stop_probability + np.finfo(float).eps)}"
    )
    print(f"Average length of traces: {np.mean([len(e) for e in examples])}")

    mp_rdp_generator = MultiprocessedGenerator(rdp_generator, nb_processes=nb_processes)
    pdfa = learn_pdfa(
        algorithm=Algorithm.BALLE,
        nb_samples=nb_samples,
        sample_generator=mp_rdp_generator,
        alphabet_size=rdp_generator.alphabet_size(),
        delta=delta,
        n=n_upperbound,
    )
    return rdp_generator, pdfa


def print_rdp_pdfa_transitions(pdfa: PDFA):
    print("Print transitions")
    for qi, out in pdfa.transition_dict.items():
        for c, (qf, prob) in out.items():
            if c == -1:
                print(f"qi={qi}\tc={c}\t\t\tqf={qf}\tp={prob}")
            else:
                a,r,s = rdp_generator.decoder(c)
                print(f"qi={qi}\ta={a}\tr={r}\ts={s}\tqf={qf}\tp={prob}")
        
        

##### Rotating MAB, 2 arms, with probabilities (1.0, 0.0)

In [4]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    nb_samples=20000,
    winning_probabilities=(1.0, 0.0),
    **DEFAULT_CONFIG)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.679
Print transitions
qi=0	a=1	r=0	s=0	qf=0	p=0.408
qi=0	a=0	r=1	s=1	qf=1	p=0.3944
qi=0	c=-1			qf=-1	p=0.1976
qi=1	a=1	r=1	s=1	qf=0	p=0.41393686718284695
qi=1	a=0	r=0	s=0	qf=1	p=0.3918999404407385
qi=1	c=-1			qf=-1	p=0.19416319237641455


[2020-12-31 00:06:28,895][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f41abe09210>, nb_samples=20000, n=4, alphabet_size=8, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False, '
 'with_infty_norm=True)')
[2020-12-31 00:06:28,897][src.learn_pdfa][INFO] Generating the sample.
[2020-12-31 00:06:30,071][src.learn_pdfa][INFO] Average trace length: 5.012.
[2020-12-31 00:06:30,072][src.learn_pdfa][INFO] Populate root multiset.
[2020-12-31 00:06:30,115][src.learn_pdfa][INFO] Iteration 0
[2020-12-31 00:06:30,144][src.learn_pdfa][INFO] Iteration 1
[2020-12-31 00:06:30,165][src.learn_pdfa][INFO] Iteration 2
[2020-12-31 00:06:30,201][src.learn_pdfa][INFO] Iteration 3
[2020-12-31 00:06:30,245][src.learn_pdfa][INFO] Iteration 4
[2020-12-31 00:06:30,248][src.learn_pdfa][INFO] Biggest multiset has cardinality 0, done
[2020-12-31 00:06:30,288][graphviz.files][DEBUG] write 270 bytes to '/tmp/tm

##### Rotating MAB, 2 arms, with probabilities (0.7, 0.3)

In [6]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(0.7, 0.3),
    nb_samples=25000,
    **DEFAULT_CONFIG)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.679
Print transitions
qi=0	a=1	r=0	s=0	qf=0	p=0.28896
qi=0	a=0	r=1	s=1	qf=1	p=0.2784
qi=0	a=1	r=1	s=1	qf=1	p=0.12032
qi=0	a=0	r=0	s=0	qf=0	p=0.11392
qi=0	c=-1			qf=-1	p=0.1984
qi=1	a=1	r=1	s=1	qf=0	p=0.2943089430894309
qi=1	a=0	r=0	s=0	qf=1	p=0.26991869918699185
qi=1	a=0	r=1	s=1	qf=0	p=0.12926829268292683
qi=1	a=1	r=0	s=0	qf=1	p=0.12357723577235771
qi=1	c=-1			qf=-1	p=0.18292682926829268


[2020-12-31 00:07:20,708][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f41abd43510>, nb_samples=25000, n=4, alphabet_size=8, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False, '
 'with_infty_norm=True)')
[2020-12-31 00:07:20,710][src.learn_pdfa][INFO] Generating the sample.
[2020-12-31 00:07:22,153][src.learn_pdfa][INFO] Average trace length: 4.99456.
[2020-12-31 00:07:22,154][src.learn_pdfa][INFO] Populate root multiset.
[2020-12-31 00:07:22,207][src.learn_pdfa][INFO] Iteration 0
[2020-12-31 00:07:22,241][src.learn_pdfa][INFO] Iteration 1
[2020-12-31 00:07:22,270][src.learn_pdfa][INFO] Iteration 2
[2020-12-31 00:07:22,306][src.learn_pdfa][INFO] Iteration 3
[2020-12-31 00:07:22,344][src.learn_pdfa][INFO] Iteration 4
[2020-12-31 00:07:22,394][src.learn_pdfa][INFO] Iteration 5
[2020-12-31 00:07:22,440][src.learn_pdfa][INFO] Iteration 6
[2020-12-31 00:07:22,485][src.learn_pdfa][I

##### Rotating MAB, 3 arms, with probabilities (1.0, 0.0, 0.0)

In [8]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(0.7, 0.3),
    nb_samples=30000,
    **DEFAULT_CONFIG)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.679
Print transitions
qi=0	a=0	r=1	s=1	qf=1	p=0.29733333333333334
qi=0	a=1	r=0	s=0	qf=0	p=0.2704
qi=0	a=1	r=1	s=1	qf=1	p=0.1176
qi=0	a=0	r=0	s=0	qf=0	p=0.11546666666666666
qi=0	c=-1			qf=-1	p=0.1992
qi=1	a=1	r=1	s=1	qf=0	p=0.29775784753363227
qi=1	a=0	r=0	s=0	qf=1	p=0.262780269058296
qi=1	a=1	r=0	s=0	qf=1	p=0.11838565022421525
qi=1	a=0	r=1	s=1	qf=0	p=0.11928251121076233
qi=1	c=-1			qf=-1	p=0.20179372197309417


[2020-12-31 00:13:54,822][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f41c3f6a2d0>, nb_samples=30000, n=4, alphabet_size=8, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False, '
 'with_infty_norm=True)')
[2020-12-31 00:13:54,824][src.learn_pdfa][INFO] Generating the sample.
[2020-12-31 00:13:56,395][src.learn_pdfa][INFO] Average trace length: 4.972533333333334.
[2020-12-31 00:13:56,396][src.learn_pdfa][INFO] Populate root multiset.
[2020-12-31 00:13:56,487][src.learn_pdfa][INFO] Iteration 0
[2020-12-31 00:13:56,522][src.learn_pdfa][INFO] Iteration 1
[2020-12-31 00:13:56,575][src.learn_pdfa][INFO] Iteration 2
[2020-12-31 00:13:56,625][src.learn_pdfa][INFO] Iteration 3
[2020-12-31 00:13:56,675][src.learn_pdfa][INFO] Iteration 4
[2020-12-31 00:13:56,722][src.learn_pdfa][INFO] Iteration 5
[2020-12-31 00:13:56,773][src.learn_pdfa][INFO] Iteration 6
[2020-12-31 00:13:56,830][src.lea

##### Rotating MAB, 3 arms, with probabilities (1.0, 0.0, 0.0)

In [9]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(1.0, 0.0, 0.0),
    nb_samples=100000,
    **DEFAULT_CONFIG)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.679
Print transitions
qi=0	a=0	r=1	s=1	qf=1	p=0.27136
qi=0	a=1	r=0	s=0	qf=0	p=0.26448
qi=0	a=2	r=0	s=0	qf=0	p=0.26384
qi=0	c=-1			qf=-1	p=0.20032
qi=1	a=0	r=0	s=0	qf=1	p=0.27358490566037735
qi=1	a=2	r=0	s=0	qf=1	p=0.2644457547169811
qi=1	a=1	r=1	s=1	qf=2	p=0.24941037735849056
qi=1	c=-1			qf=-1	p=0.21255896226415094
qi=2	a=2	r=1	s=1	qf=0	p=0.27831474597273853
qi=2	a=0	r=0	s=0	qf=2	p=0.27732342007434946
qi=2	a=1	r=0	s=0	qf=2	p=0.25551425030978947
qi=2	c=-1			qf=-1	p=0.18884758364312257


[2020-12-31 00:14:15,583][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f41c3ba9f90>, nb_samples=100000, n=5, alphabet_size=12, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False, '
 'with_infty_norm=True)')
[2020-12-31 00:14:15,584][src.learn_pdfa][INFO] Generating the sample.
[2020-12-31 00:14:20,260][src.learn_pdfa][INFO] Average trace length: 4.86752.
[2020-12-31 00:14:20,261][src.learn_pdfa][INFO] Populate root multiset.
[2020-12-31 00:14:20,539][src.learn_pdfa][INFO] Iteration 0
[2020-12-31 00:14:20,613][src.learn_pdfa][INFO] Iteration 1
[2020-12-31 00:14:20,728][src.learn_pdfa][INFO] Iteration 2
[2020-12-31 00:14:20,888][src.learn_pdfa][INFO] Iteration 3
[2020-12-31 00:14:21,015][src.learn_pdfa][INFO] Iteration 4
[2020-12-31 00:14:21,145][src.learn_pdfa][INFO] Iteration 5
[2020-12-31 00:14:21,297][src.learn_pdfa][INFO] Iteration 6
[2020-12-31 00:14:21,456][src.learn_pdfa]

##### Rotating MAB, 3 arms, with probabilities (0.1, 0.2, 0.9)

In [10]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(0.1, 0.2, 0.9),
    nb_samples=50000,
    **DEFAULT_CONFIG)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.84
Print transitions
qi=0	a=0	r=0	s=0	qf=0	p=0.24217600000000006
qi=0	a=2	r=1	s=1	qf=1	p=0.23910400000000007
qi=0	a=1	r=0	s=0	qf=0	p=0.21298400000000006
qi=0	a=1	r=1	s=1	qf=1	p=0.053904000000000014
qi=0	a=0	r=1	s=1	qf=1	p=0.026808000000000005
qi=0	a=2	r=0	s=0	qf=0	p=0.026824000000000008
qi=0	c=-1			qf=-1	p=0.19820000000000004
qi=1	a=0	r=1	s=1	qf=2	p=0.24113493192440555
qi=1	a=1	r=0	s=0	qf=1	p=0.23935683804104851
qi=1	a=2	r=0	s=0	qf=1	p=0.21324425929689084
qi=1	a=2	r=1	s=1	qf=2	p=0.055349522454785596
qi=1	a=0	r=0	s=0	qf=1	p=0.028144686039422873
qi=1	a=1	r=1	s=1	qf=2	p=0.0242582808372282
qi=1	c=-1			qf=-1	p=0.1985114814062182
qi=2	a=1	r=1	s=1	qf=0	p=0.23849467240988445
qi=2	a=2	r=0	s=0	qf=2	p=0.23630318144033854
qi=2	a=0	r=0	s=0	qf=2	p=0.21438827174488023
qi=2	a=0	r=1	s=1	qf=0	p=0.053427038464444944
qi=2	a=2	r=1	s=1	qf=0	p=0.02811153933348446
qi=2	a=1	r=0	s=0	qf=2	p=0.02607118567218318
qi=2	c=-

[2020-12-31 00:14:35,479][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f41abdd2250>, nb_samples=1000000, n=6, alphabet_size=12, '
 'delta=0.05, epsilon=0.1, with_smoothing=False, with_ground=False, '
 'with_infty_norm=True)')
[2020-12-31 00:14:35,480][src.learn_pdfa][INFO] Generating the sample.
[2020-12-31 00:15:37,168][src.learn_pdfa][INFO] Average trace length: 5.011504.
[2020-12-31 00:15:37,168][src.learn_pdfa][INFO] Populate root multiset.
[2020-12-31 00:15:39,763][src.learn_pdfa][INFO] Iteration 0
[2020-12-31 00:15:40,892][src.learn_pdfa][INFO] Iteration 1
[2020-12-31 00:15:41,836][src.learn_pdfa][INFO] Iteration 2
[2020-12-31 00:15:43,184][src.learn_pdfa][INFO] Iteration 3
[2020-12-31 00:15:44,070][src.learn_pdfa][INFO] Iteration 4
[2020-12-31 00:15:45,298][src.learn_pdfa][INFO] Iteration 5
[2020-12-31 00:15:46,581][src.learn_pdfa][INFO] Iteration 6
[2020-12-31 00:15:47,728][src.learn_pd

##### Rotating MAB, 4 arms, with probabilities (1.0, 0.0, 0.0, 0.0)


In [12]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.1,
    winning_probabilities=(1.0, 0.0, 0.0, 0.0),
    nb_samples=75000,
    **DEFAULT_CONFIG)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 9.999999999999977
Average length of traces: 7.879
Print transitions
qi=0	a=2	r=0	s=0	qf=0	p=0.23498666666666668
qi=0	a=1	r=0	s=0	qf=0	p=0.22656
qi=0	a=3	r=0	s=0	qf=0	p=0.22250666666666666
qi=0	a=0	r=1	s=1	qf=1	p=0.22026666666666667
qi=0	c=-1			qf=-1	p=0.09568
qi=1	a=2	r=0	s=0	qf=1	p=0.22666666666666666
qi=1	a=1	r=1	s=1	qf=2	p=0.22528735632183908
qi=1	a=0	r=0	s=0	qf=1	p=0.22436781609195408
qi=1	a=3	r=0	s=0	qf=1	p=0.22375478927203066
qi=1	c=-1			qf=-1	p=0.0999233716475096
qi=2	a=1	r=0	s=0	qf=2	p=0.2296374146085129
qi=2	a=2	r=1	s=1	qf=3	p=0.22070415133998944
qi=2	a=3	r=0	s=0	qf=2	p=0.23121387283237
qi=2	a=0	r=0	s=0	qf=2	p=0.22122963741460844
qi=2	c=-1			qf=-1	p=0.09721492380451913
qi=3	a=2	r=0	s=0	qf=3	p=0.22784810126582264
qi=3	a=3	r=1	s=1	qf=0	p=0.22486969471332824
qi=3	a=0	r=0	s=0	qf=3	p=0.21667907669396858
qi=3	a=1	r=0	s=0	qf=3	p=0.21965748324646306
qi=3	c=-1			qf=-1	p=0.11094564408041746


[2020-12-31 00:20:29,839][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f41c32fddd0>, nb_samples=75000, n=5, alphabet_size=16, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False, '
 'with_infty_norm=True)')
[2020-12-31 00:20:29,842][src.learn_pdfa][INFO] Generating the sample.
[2020-12-31 00:20:40,418][src.learn_pdfa][INFO] Average trace length: 8.974613333333334.
[2020-12-31 00:20:40,419][src.learn_pdfa][INFO] Populate root multiset.
[2020-12-31 00:20:41,262][src.learn_pdfa][INFO] Iteration 0
[2020-12-31 00:20:41,726][src.learn_pdfa][INFO] Iteration 1
[2020-12-31 00:20:42,153][src.learn_pdfa][INFO] Iteration 2
[2020-12-31 00:20:42,744][src.learn_pdfa][INFO] Iteration 3
[2020-12-31 00:20:43,318][src.learn_pdfa][INFO] Iteration 4
[2020-12-31 00:20:44,215][src.learn_pdfa][INFO] Iteration 5
[2020-12-31 00:20:44,816][src.learn_pdfa][INFO] Iteration 6
[2020-12-31 00:20:45,815][src.le

##### Rotating MAB, 5 arms, with probabilities (1.0, 0.0, 0.0, 0.0, 0.0)


In [23]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.1,
    winning_probabilities=(1.0, 0.0, 0.0, 0.0, 0.0),
    nb_samples=75000,
    **DEFAULT_CONFIG)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

{'max_episode_steps': 30, 'delta': 0.1, 'n_upperbound': 5, 'nb_processes': 8}
Apriori expected length of traces: 1/stop_prob = 9.999999999999977
Average length of traces: 8.569
Print transitions
qi=0	a=4	r=0	s=0	qf=0	p=0.18677333333333335
qi=0	a=0	r=1	s=1	qf=1	p=0.1856
qi=0	a=1	r=0	s=0	qf=0	p=0.18112
qi=0	a=2	r=0	s=0	qf=0	p=0.17781333333333332
qi=0	a=3	r=0	s=0	qf=0	p=0.17184
qi=0	c=-1			qf=-1	p=0.09685333333333333
qi=1	a=1	r=1	s=1	qf=2	p=0.18241347053320858
qi=1	a=3	r=0	s=0	qf=1	p=0.17633302151543498
qi=1	a=4	r=0	s=0	qf=1	p=0.17352666043030868
qi=1	a=0	r=0	s=0	qf=1	p=0.17960710944808234
qi=1	a=2	r=0	s=0	qf=1	p=0.18475210477081383
qi=1	c=-1			qf=-1	p=0.10336763330215154
qi=2	a=2	r=1	s=1	qf=3	p=0.19784172661870503
qi=2	a=1	r=0	s=0	qf=2	p=0.17535971223021574
qi=2	a=4	r=0	s=0	qf=2	p=0.18615107913669063
qi=2	a=0	r=0	s=0	qf=2	p=0.18075539568345317
qi=2	a=3	r=0	s=0	qf=2	p=0.16097122302158276
qi=2	c=-1			qf=-1	p=0.09892086330935258
qi=3	a=0	r=0	s=0	qf=3	p=0.17630853994490353
qi=3	a=3	r=1	s=1	q

[2020-12-31 00:33:42,284][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f411e562d50>, nb_samples=75000, n=5, alphabet_size=20, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False, '
 'with_infty_norm=True)')
[2020-12-31 00:33:42,288][src.learn_pdfa][INFO] Generating the sample.
[2020-12-31 00:33:51,520][src.learn_pdfa][INFO] Average trace length: 9.685653333333333.
[2020-12-31 00:33:51,521][src.learn_pdfa][INFO] Populate root multiset.
[2020-12-31 00:33:51,995][src.learn_pdfa][INFO] Iteration 0
[2020-12-31 00:33:52,385][src.learn_pdfa][INFO] Iteration 1
[2020-12-31 00:33:52,718][src.learn_pdfa][INFO] Iteration 2
[2020-12-31 00:33:53,171][src.learn_pdfa][INFO] Iteration 3
[2020-12-31 00:33:53,649][src.learn_pdfa][INFO] Iteration 4
[2020-12-31 00:33:54,297][src.learn_pdfa][INFO] Iteration 5
[2020-12-31 00:33:54,684][src.learn_pdfa][INFO] Iteration 6
[2020-12-31 00:33:55,136][src.le