## RDP Learning

In this notebook, we show how to perform PAC learning
over RDPs.

### Experiment 1: Rotating MAB

In [1]:

from typing import Tuple

import gym
import numpy as np
from gym.wrappers import TimeLimit


from notebooks.utils import render_automaton
from src import NonMarkovianRotatingMAB
from src.learn_pdfa.base import learn_pdfa, Algorithm
from src.learn_pdfa.utils.generator import MultiprocessedGenerator
from src.learn_rdps import random_exploration_policy, RDPGenerator
from src.pdfa import PDFA
from src.pdfa.base import FINAL_SYMBOL
from functools import partial

char2str = lambda c: str(rdp_generator.decoder(c)) if c!=FINAL_SYMBOL else "-1"

env = NonMarkovianRotatingMAB(winning_probs=[0.9, 0.2])

print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")

s = env.reset()
print(f"Initial state: {s}")

action = 0
sp, reward, _, _ = env.step(action)
print("-" * 10)
print(f"Action: {action}")
print(f"Next state: {sp}")
print(f"Reward: {reward}")

sp, reward, _, _ = env.step(action)
print("-" * 10)
print(f"Action: {action}")
print(f"Next state: {sp}")
print(f"Reward: {reward}")


Observation space: Discrete(2)
Action space: Discrete(2)
Initial state: 0
----------
Action: 0
Next state: 1
Reward: 1.0
----------
Action: 0
Next state: 1
Reward: 1.0


### Learning

In [2]:
def learning_rotating_mab(
    stop_probability: float,
    winning_probabilities: Tuple[float, ...],
    max_episode_steps: int,
    nb_samples: int,
    delta: float,
    n_upperbound: int,
    nb_processes: int = 8,
) -> Tuple[RDPGenerator, PDFA]:
    """Test learning of Rotating MAB."""
    env = gym.make("NonMarkovianRotatingMAB-v0", winning_probs=winning_probabilities)
    env = TimeLimit(env, max_episode_steps=max_episode_steps)

    policy = partial(random_exploration_policy, env)

    rdp_generator = RDPGenerator(
        env, policy=policy, nb_rewards=2, stop_probability=stop_probability
    )

    examples = rdp_generator.sample(n=1000)

    print(
        f"Apriori expected length of traces: 1/stop_prob = {1 / (stop_probability + np.finfo(float).eps)}"
    )
    print(f"Average length of traces: {np.mean([len(e) for e in examples])}")

    mp_rdp_generator = MultiprocessedGenerator(rdp_generator, nb_processes=nb_processes)
    pdfa = learn_pdfa(
        algorithm=Algorithm.BALLE,
        nb_samples=nb_samples,
        sample_generator=mp_rdp_generator,
        alphabet_size=rdp_generator.alphabet_size(),
        delta=delta,
        n=n_upperbound,
    )
    return rdp_generator, pdfa


def print_rdp_pdfa_transitions(pdfa: PDFA):
    print("Print transitions")
    for qi, out in pdfa.transition_dict.items():
        for c, (qf, prob) in out.items():
            if c == -1:
                print(f"qi={qi}\tc={c}\t\t\tqf={qf}\tp={prob}")
            else:
                a,r,s = rdp_generator.decoder(c)
                print(f"qi={qi}\ta={a}\tr={r}\ts={s}\tqf={qf}\tp={prob}")
        
        

##### Rotating MAB, 2 arms, with probabilities (1.0, 0.0)

In [3]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(1.0, 0.0),
    max_episode_steps=15,
    nb_samples=100000,
    delta=0.1,
    n_upperbound=4,
    nb_processes=8)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.679
Print transitions
qi=0	a=1	r=0	s=0	qf=0	p=0.41184
qi=0	a=0	r=1	s=1	qf=1	p=0.38784
qi=0	c=-1			qf=-1	p=0.20032
qi=1	a=0	r=0	s=0	qf=1	p=0.40387858347386174
qi=1	a=1	r=1	s=1	qf=0	p=0.3954468802698145
qi=1	c=-1			qf=-1	p=0.20067453625632378


[2020-11-21 19:04:49,223][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7fc10fca5090>, nb_samples=100000, n=4, alphabet_size=8, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False)')
[2020-11-21 19:04:49,225][src.learn_pdfa][INFO] Generating the sample.
[2020-11-21 19:04:52,794][src.learn_pdfa][INFO] Populate root multiset.
[2020-11-21 19:04:52,824][src.learn_pdfa][INFO] Iteration 0
[2020-11-21 19:04:53,464][src.learn_pdfa][INFO] Iteration 1
[2020-11-21 19:04:54,136][src.learn_pdfa][INFO] Iteration 2
[2020-11-21 19:04:54,986][src.learn_pdfa][INFO] Iteration 3
[2020-11-21 19:04:55,857][src.learn_pdfa][INFO] Iteration 4
[2020-11-21 19:04:56,729][graphviz.files][DEBUG] write 258 bytes to '/tmp/tmp7kidt8_q/output'
[2020-11-21 19:04:56,729][graphviz.backend][DEBUG] run ['dot', '-Tsvg', '-O', 'output']
Process ForkPoolWorker-5:
Process ForkPoolWorker-4:
Process ForkPoolWorker-3:
Process

##### Rotating MAB, 2 arms, with probabilities (0.7, 0.3)

In [18]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(0.7, 0.3),
    max_episode_steps=15,
    nb_samples=300000,
    delta=0.1,
    n_upperbound=4,
    nb_processes=8)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.679


[2020-11-19 23:06:50,197][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f8386e03810>, nb_samples=300000, n=4, alphabet_size=8, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False)')
[2020-11-19 23:06:50,199][src.learn_pdfa][INFO] Generating the sample.
[2020-11-19 23:07:05,843][src.learn_pdfa][INFO] Populate root multiset.
[2020-11-19 23:07:05,932][src.learn_pdfa][INFO] Iteration 0
[2020-11-19 23:07:09,464][src.learn_pdfa][INFO] Iteration 1
[2020-11-19 23:07:14,072][src.learn_pdfa][INFO] Iteration 2
[2020-11-19 23:07:18,112][src.learn_pdfa][INFO] Iteration 3
[2020-11-19 23:07:22,268][src.learn_pdfa][INFO] Iteration 4
[2020-11-19 23:07:26,579][src.learn_pdfa][INFO] Iteration 5
[2020-11-19 23:07:31,242][src.learn_pdfa][INFO] Iteration 6
[2020-11-19 23:07:35,766][src.learn_pdfa][INFO] Iteration 7
[2020-11-19 23:07:40,701][src.learn_pdfa][INFO] Iteration 8
[2020-11-19 23:07:43,759][g

Print transitions
qi=0	a=0	r=1	s=1	qf=1	p=0.28128
qi=0	a=1	r=0	s=0	qf=0	p=0.27976
qi=0	a=1	r=1	s=1	qf=1	p=0.11994666666666666
qi=0	a=0	r=0	s=0	qf=0	p=0.12144
qi=0	c=-1			qf=-1	p=0.19757333333333332
qi=1	a=1	r=1	s=1	qf=0	p=0.27815699658703075
qi=1	a=0	r=0	s=0	qf=1	p=0.2758816837315131
qi=1	a=1	r=0	s=0	qf=1	p=0.12580583996966252
qi=1	a=0	r=1	s=1	qf=0	p=0.12011755783086843
qi=1	c=-1			qf=-1	p=0.2000379218809253




##### Rotating MAB, 3 arms, with probabilities (1.0, 0.0, 0.0)

In [4]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(0.7, 0.3),
    max_episode_steps=15,
    nb_samples=300000,
    delta=0.1,
    n_upperbound=4,
    nb_processes=8)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.679
Print transitions
qi=0	a=0	r=1	s=1	qf=1	p=0.28256
qi=0	a=1	r=0	s=0	qf=0	p=0.27848
qi=0	a=1	r=1	s=1	qf=1	p=0.12165333333333334
qi=0	a=0	r=0	s=0	qf=0	p=0.11973333333333333
qi=0	c=-1			qf=-1	p=0.19757333333333332
qi=1	a=0	r=0	s=0	qf=1	p=0.2805775764439411
qi=1	a=1	r=1	s=1	qf=0	p=0.28020007550018877
qi=1	a=1	r=0	s=0	qf=1	p=0.12344280860702152
qi=1	a=0	r=1	s=1	qf=0	p=0.11740279350698377
qi=1	c=-1			qf=-1	p=0.19837674594186486


[2020-11-21 19:05:25,891][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7fc15c5ec0d0>, nb_samples=300000, n=4, alphabet_size=8, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False)')
[2020-11-21 19:05:25,893][src.learn_pdfa][INFO] Generating the sample.
[2020-11-21 19:05:42,211][src.learn_pdfa][INFO] Populate root multiset.
[2020-11-21 19:05:42,316][src.learn_pdfa][INFO] Iteration 0
[2020-11-21 19:05:46,128][src.learn_pdfa][INFO] Iteration 1
[2020-11-21 19:05:50,500][src.learn_pdfa][INFO] Iteration 2
[2020-11-21 19:05:53,720][src.learn_pdfa][INFO] Iteration 3
[2020-11-21 19:05:56,978][src.learn_pdfa][INFO] Iteration 4
[2020-11-21 19:06:00,404][src.learn_pdfa][INFO] Iteration 5
[2020-11-21 19:06:04,105][src.learn_pdfa][INFO] Iteration 6
[2020-11-21 19:06:08,404][src.learn_pdfa][INFO] Iteration 7
[2020-11-21 19:06:12,479][src.learn_pdfa][INFO] Iteration 8
[2020-11-21 19:06:15,133][g

##### Rotating MAB, 3 arms, with probabilities (1.0, 0.0, 0.0)

In [20]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(1.0, 0.0, 0.0),
    max_episode_steps=15,
    nb_samples=100000,
    delta=0.1,
    n_upperbound=5,
    nb_processes=8)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.679


[2020-11-19 23:12:47,851][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f836779b0d0>, nb_samples=100000, n=5, alphabet_size=12, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False)')
[2020-11-19 23:12:47,853][src.learn_pdfa][INFO] Generating the sample.
[2020-11-19 23:12:52,345][src.learn_pdfa][INFO] Populate root multiset.
[2020-11-19 23:12:52,378][src.learn_pdfa][INFO] Iteration 0
[2020-11-19 23:12:53,250][src.learn_pdfa][INFO] Iteration 1
[2020-11-19 23:12:54,543][src.learn_pdfa][INFO] Iteration 2
[2020-11-19 23:12:55,757][src.learn_pdfa][INFO] Iteration 3
[2020-11-19 23:12:56,854][src.learn_pdfa][INFO] Iteration 4
[2020-11-19 23:12:58,011][src.learn_pdfa][INFO] Iteration 5
[2020-11-19 23:12:59,276][src.learn_pdfa][INFO] Iteration 6
[2020-11-19 23:13:00,497][src.learn_pdfa][INFO] Iteration 7
[2020-11-19 23:13:01,754][src.learn_pdfa][INFO] Iteration 8
[2020-11-19 23:13:03,022][

Print transitions
qi=0	a=0	r=1	s=1	qf=1	p=0.27216
qi=0	a=2	r=0	s=0	qf=0	p=0.2644
qi=0	a=1	r=0	s=0	qf=0	p=0.26312
qi=0	a=2	r=1	s=1	qf=0	p=0.0
qi=0	a=0	r=0	s=0	qf=0	p=0.0
qi=0	c=-1			qf=-1	p=0.20032
qi=1	a=2	r=0	s=0	qf=1	p=0.2748383303938859
qi=1	a=0	r=0	s=0	qf=1	p=0.27219282774838327
qi=1	a=1	r=1	s=1	qf=2	p=0.25837742504409167
qi=1	c=-1			qf=-1	p=0.19459141681363898
qi=2	a=2	r=1	s=1	qf=0	p=0.27757307786784574
qi=2	a=0	r=0	s=0	qf=0	p=0.2697126013264554
qi=2	a=1	r=0	s=0	qf=0	p=0.2630803242446573
qi=2	c=-1			qf=-1	p=0.18963399656104152




##### Rotating MAB, 3 arms, with probabilities (0.1, 0.2, 0.9)

In [21]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.2,
    winning_probabilities=(0.1, 0.2, 0.9),
    max_episode_steps=1000,
    nb_samples=1000000,
    delta=0.05,
    n_upperbound=6,
    nb_processes=8)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 4.999999999999994
Average length of traces: 3.84


[2020-11-19 23:13:17,153][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f83a44327d0>, nb_samples=1000000, n=6, alphabet_size=12, '
 'delta=0.05, epsilon=0.1, with_smoothing=False, with_ground=False)')
[2020-11-19 23:13:17,156][src.learn_pdfa][INFO] Generating the sample.
[2020-11-19 23:14:23,995][src.learn_pdfa][INFO] Populate root multiset.
[2020-11-19 23:14:24,295][src.learn_pdfa][INFO] Iteration 0
[2020-11-19 23:14:45,350][src.learn_pdfa][INFO] Iteration 1
[2020-11-19 23:15:10,512][src.learn_pdfa][INFO] Iteration 2
[2020-11-19 23:15:37,125][src.learn_pdfa][INFO] Iteration 3
[2020-11-19 23:15:59,406][src.learn_pdfa][INFO] Iteration 4
[2020-11-19 23:16:22,827][src.learn_pdfa][INFO] Iteration 5
[2020-11-19 23:16:47,184][src.learn_pdfa][INFO] Iteration 6
[2020-11-19 23:17:11,136][src.learn_pdfa][INFO] Iteration 7
[2020-11-19 23:17:33,631][src.learn_pdfa][INFO] Iteration 8
[2020-11-19 23:17:56,676

##### Rotating MAB, 4 arms, with probabilities (1.0, 0.0, 0.0, 0.0)


In [23]:
rdp_generator, pdfa = learning_rotating_mab(
    stop_probability=0.1,
    winning_probabilities=(1.0, 0.0, 0.0, 0.0),
    max_episode_steps=20,
    nb_samples=300000,
    delta=0.1,
    n_upperbound=5,
    nb_processes=8)

render_automaton(pdfa, char2str=char2str, with_prob=False)
print_rdp_pdfa_transitions(pdfa)

Apriori expected length of traces: 1/stop_prob = 9.999999999999977
Average length of traces: 7.879


[2020-11-19 23:25:53,799][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.utils.generator.MultiprocessedGenerator '
 'object at 0x7f8346069110>, nb_samples=300000, n=5, alphabet_size=16, '
 'delta=0.1, epsilon=0.1, with_smoothing=False, with_ground=False)')
[2020-11-19 23:25:53,801][src.learn_pdfa][INFO] Generating the sample.
[2020-11-19 23:26:23,104][src.learn_pdfa][INFO] Populate root multiset.
[2020-11-19 23:26:23,335][src.learn_pdfa][INFO] Iteration 0
[2020-11-19 23:26:37,072][src.learn_pdfa][INFO] Iteration 1
[2020-11-19 23:26:48,910][src.learn_pdfa][INFO] Iteration 2
[2020-11-19 23:27:08,089][src.learn_pdfa][INFO] Iteration 3
[2020-11-19 23:27:30,879][src.learn_pdfa][INFO] Iteration 4
[2020-11-19 23:27:45,404][src.learn_pdfa][INFO] Iteration 5
[2020-11-19 23:28:01,675][src.learn_pdfa][INFO] Iteration 6
[2020-11-19 23:28:18,530][src.learn_pdfa][INFO] Iteration 7
[2020-11-19 23:28:35,337][src.learn_pdfa][INFO] Iteration 8
[2020-11-19 23:29:00,201][

Print transitions
qi=0	a=2	r=0	s=0	qf=0	p=0.22725333333333333
qi=0	a=3	r=0	s=0	qf=0	p=0.2264
qi=0	a=0	r=1	s=1	qf=1	p=0.22693333333333332
qi=0	a=1	r=0	s=0	qf=0	p=0.21981333333333333
qi=0	a=1	r=1	s=1	qf=0	p=0.0
qi=0	a=0	r=0	s=0	qf=0	p=0.0
qi=0	a=2	r=1	s=1	qf=1	p=0.0
qi=0	a=3	r=1	s=1	qf=0	p=0.0
qi=0	c=-1			qf=-1	p=0.0996
qi=1	a=0	r=0	s=0	qf=0	p=0.22747059970112404
qi=1	a=3	r=0	s=0	qf=0	p=0.22214281073354558
qi=1	a=2	r=0	s=0	qf=0	p=0.2268858423754142
qi=1	a=1	r=1	s=1	qf=0	p=0.2210382691183159
qi=1	a=1	r=0	s=0	qf=0	p=0.0
qi=1	a=3	r=1	s=1	qf=0	p=0.0
qi=1	c=-1			qf=-1	p=0.10246247807160029




##### Rotating MAB, 5 arms, with probabilities (1.0, 0.0, 0.0, 0.0, 0.0)


In [3]:
# rdp_generator, (v, t) = learning_rotating_mab(
#     stop_probability=0.15,
#     winning_probabilities=(1.0, 0.0, 0.0, 0.0, 0.0),
#     max_episode_steps=100,
#     nb_samples=1000000,
#     delta=0.05,
#     n_upperbound=10,
#     nb_processes=6)
#
# render_digraph(to_graphviz_from_graph(v, t, char2str=lambda c: str(rdp_generator.decoder(c))))

Apriori expected length of traces: 1/stop_prob = 6.666666666666657
Average length of traces: 5.611


[2020-11-06 22:11:12,955][src.learn_pdfa][INFO] Parameters: ('BalleParams(sample_generator=<src.learn_pdfa.common.MultiprocessedGenerator '
 'object at 0x7f3400f48910>, nb_samples=1000000, n=10, alphabet_size=20, '
 'delta=0.05)')
[2020-11-06 22:12:16,380][src.learn_pdfa][INFO] Iteration 0
[2020-11-06 22:12:25,422][src.learn_pdfa][INFO] Iteration 1
[2020-11-06 22:12:35,280][src.learn_pdfa][INFO] Iteration 2
[2020-11-06 22:12:45,929][src.learn_pdfa][INFO] Iteration 3
[2020-11-06 22:12:59,253][src.learn_pdfa][INFO] Iteration 4
[2020-11-06 22:13:14,098][src.learn_pdfa][INFO] Iteration 5
[2020-11-06 22:13:28,238][src.learn_pdfa][INFO] Iteration 6
[2020-11-06 22:13:43,299][src.learn_pdfa][INFO] Iteration 7
[2020-11-06 22:13:58,909][src.learn_pdfa][INFO] Iteration 8
[2020-11-06 22:14:15,204][src.learn_pdfa][INFO] Iteration 9
[2020-11-06 22:14:32,561][src.learn_pdfa][INFO] Iteration 10
[2020-11-06 22:14:48,810][src.learn_pdfa][INFO] Iteration 11
[2020-11-06 22:15:05,389][src.learn_pdfa][INFO]