In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt

from bayegent import Bayegent
from environment import GridMazeEnvironment

In [2]:
def run_one_seed(seed, parameters, n_runs=100):
    environment = GridMazeEnvironment(seed)
    agent = Bayegent(environment, seed, parameters=parameters)

    all_position_histories = agent.learn_bayesian(n_runs)

    return all_position_histories


## Weaning curiosity

Try the first 100 with a weaning curiosity, weaning from 0.9 to 0.3 over 100 runs

In [5]:
n_runs = 100
curiosities = list(reversed(np.linspace(0.3,0.9,n_runs)))

weaning_curiosity_path_lengths = []
running_times_weaning = []

# Iterate over 100 seeds
for seed in range(100):
    print(f'{seed}/100')
    start = time.time()
    position_history = run_one_seed(seed, 
                                    parameters={
                                        'curiosity': curiosities,
                                        'step_reward': -0.1,
                                        'goal_reward': 1,
                                        'learning_rate': 0.5,
                                        'discount_factor': 0.8,
                                    },
                                    n_runs=n_runs)
    end = time.time()

    running_times_weaning.append(end-start)
    weaning_curiosity_path_lengths.append(len(position_history))

mean_path_length_weaning_curiosity = np.mean(weaning_curiosity_path_lengths)

0/100
1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
18/100
19/100
20/100
21/100
22/100
23/100
24/100
25/100
26/100
27/100
28/100
29/100
30/100
31/100
32/100
33/100
34/100
35/100
36/100
37/100
38/100
39/100
40/100
41/100
42/100
43/100
44/100
45/100
46/100
47/100
48/100
49/100
50/100
51/100
52/100
53/100
54/100
55/100
56/100
57/100
58/100
59/100
60/100
61/100
62/100
63/100
64/100
65/100
66/100
67/100
68/100
69/100
70/100
71/100
72/100
73/100
74/100
75/100
76/100
77/100
78/100
79/100
80/100
81/100
82/100
83/100
84/100
85/100
86/100
87/100
88/100
89/100
90/100
91/100
92/100
93/100
94/100
95/100
96/100
97/100
98/100
99/100


In [6]:
print('Running times: ', np.mean(running_times_weaning))
print('Mean path length: ', mean_path_length_weaning_curiosity)

Running times:  11.016092185974122
Mean path length:  200.37


In [3]:
def run_weaning_experiment(curiosity_interval, n_runs=100):
    curiosities = list(reversed(np.linspace(curiosity_interval[0],curiosity_interval[1],n_runs)))

    weaning_curiosity_last_path_lengths = []
    weaning_curiosity_min_path_lengths = []
    running_times_weaning = []

    # Iterate over 100 seeds
    for seed in range(100):
        print(f'{seed}/100')
        start = time.time()
        position_histories = run_one_seed(seed, 
                                        parameters={
                                            'curiosity': curiosities,
                                            'step_reward': -0.1,
                                            'goal_reward': 1,
                                            'learning_rate': 0.5,
                                            'discount_factor': 0.8,
                                        },
                                        n_runs=n_runs)
        end = time.time()

        path_lengths = [len(pl) for pl in position_histories]

        running_times_weaning.append(end-start)
        weaning_curiosity_last_path_lengths.append(path_lengths[-1])
        weaning_curiosity_min_path_lengths.append(min(path_lengths))

    return weaning_curiosity_last_path_lengths, weaning_curiosity_min_path_lengths, running_times_weaning

In [4]:
last_path_lengths, min_path_lengths, running_times = run_weaning_experiment((0.2, 0.9), 100)
print('Running times: ', np.mean(running_times))
print('Mean last path length: ', np.mean(last_path_lengths))
print('Mean minimum path length: ', np.mean(min_path_lengths))

0/100
1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
18/100
19/100
20/100
21/100
22/100
23/100
24/100
25/100
26/100
27/100
28/100
29/100
30/100
31/100
32/100
33/100
34/100
35/100
36/100
37/100
38/100
39/100
40/100
41/100
42/100
43/100
44/100
45/100
46/100
47/100
48/100
49/100
50/100
51/100
52/100
53/100
54/100
55/100
56/100
57/100
58/100
59/100
60/100
61/100
62/100
63/100
64/100
65/100
66/100
67/100
68/100
69/100
70/100
71/100
72/100
73/100
74/100
75/100
76/100
77/100
78/100
79/100
80/100
81/100
82/100
83/100
84/100
85/100
86/100
87/100
88/100
89/100
90/100
91/100
92/100
93/100
94/100
95/100
96/100
97/100
98/100
99/100
Running times:  8.95418585538864
Mean last path length:  111.85
Mean minimum path length:  27.67


In [5]:
last_path_lengths, min_path_lengths, running_times = run_weaning_experiment((0.2, 1), 100)
print('Running times: ', np.mean(running_times))
print('Mean last path length: ', np.mean(last_path_lengths))
print('Mean minimum path length: ', np.mean(min_path_lengths))

0/100
1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/kam/.pyenv/versions/3.10.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/kz/71m_db3s715216ktqkjkw9nm0000gq/T/ipykernel_92602/2542348780.py", line 1, in <module>
    last_path_lengths, min_path_lengths, running_times = run_weaning_experiment((0.2, 1), 100)
  File "/var/folders/kz/71m_db3s715216ktqkjkw9nm0000gq/T/ipykernel_92602/3175335434.py", line 12, in run_weaning_experiment
    position_histories = run_one_seed(seed,
  File "/var/folders/kz/71m_db3s715216ktqkjkw9nm0000gq/T/ipykernel_92602/3065630128.py", line 5, in run_one_seed
    all_position_histories = agent.learn_bayesian(n_runs)
  File "/Users/kam/dev/classes/bayesian/bayesian_agent/bayegent.py", line 36, in learn_bayesian
    position_history, sa_history, posterior_history  = self.run_maze_bayesian(i)
  File "/Users/kam/dev/classes/bayesian/bayesian_agent/bayege

In [6]:
last_path_lengths, min_path_lengths, running_times = run_weaning_experiment((0.15, 0.9), 100)
print('Running times: ', np.mean(running_times))
print('Mean last path length: ', np.mean(last_path_lengths))
print('Mean minimum path length: ', np.mean(min_path_lengths))

0/100
1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
18/100
19/100
20/100
21/100
22/100
23/100
24/100
25/100
26/100
27/100
28/100
29/100
30/100
31/100
32/100
33/100
34/100
35/100
36/100
37/100
38/100
39/100
40/100
41/100
42/100
43/100
44/100
45/100
46/100
47/100
48/100
49/100
50/100
51/100
52/100
53/100
54/100
55/100
56/100
57/100
58/100
59/100
60/100
61/100
62/100
63/100
64/100
65/100
66/100
67/100
68/100
69/100
70/100
71/100
72/100
73/100
74/100
75/100
76/100
77/100
78/100
79/100
80/100
81/100
82/100
83/100
84/100
85/100
86/100
87/100
88/100
89/100
90/100
91/100
92/100
93/100
94/100
95/100
96/100
97/100
98/100
99/100
Running times:  9.339381091594696
Mean last path length:  163.83
Mean minimum path length:  26.51


In [7]:
last_path_lengths, min_path_lengths, running_times = run_weaning_experiment((0.2, 0.95), 100)
print('Running times: ', np.mean(running_times))
print('Mean last path length: ', np.mean(last_path_lengths))
print('Mean minimum path length: ', np.mean(min_path_lengths))

0/100
1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
18/100
19/100
20/100
21/100
22/100
23/100
24/100
25/100
26/100
27/100
28/100
29/100
30/100
31/100
32/100
33/100
34/100
35/100
36/100
37/100
38/100
39/100
40/100
41/100
42/100
43/100
44/100
45/100
46/100
47/100
48/100
49/100
50/100
51/100
52/100
53/100
54/100
55/100
56/100
57/100
58/100
59/100
60/100
61/100
62/100
63/100
64/100
65/100
66/100
67/100
68/100
69/100
70/100
71/100
72/100
73/100
74/100
75/100
76/100
77/100
78/100
79/100
80/100
81/100
82/100
83/100
84/100
85/100
86/100
87/100
88/100
89/100
90/100
91/100
92/100
93/100
94/100
95/100
96/100
97/100
98/100
99/100
Running times:  10.028860132694245
Mean last path length:  705.03
Mean minimum path length:  27.41


In [9]:
last_path_lengths, min_path_lengths, running_times = run_weaning_experiment((0.3, 0.7), 100)
print('Running times: ', np.mean(running_times))
print('Mean last path length: ', np.mean(last_path_lengths))
print('Mean minimum path length: ', np.mean(min_path_lengths))

0/100
1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
18/100
19/100
20/100
21/100
22/100
23/100
24/100
25/100
26/100
27/100
28/100
29/100
30/100
31/100
32/100
33/100
34/100
35/100
36/100
37/100
38/100
39/100
40/100
41/100
42/100
43/100
44/100
45/100
46/100
47/100
48/100
49/100
50/100
51/100
52/100
53/100
54/100
55/100
56/100
57/100
58/100
59/100
60/100
61/100
62/100
63/100
64/100
65/100
66/100
67/100
68/100
69/100
70/100
71/100
72/100
73/100
74/100
75/100
76/100
77/100
78/100
79/100
80/100
81/100
82/100
83/100
84/100
85/100
86/100
87/100
88/100
89/100
90/100
91/100
92/100
93/100
94/100
95/100
96/100
97/100
98/100
99/100
Running times:  8.61414438009262
Mean last path length:  437.37
Mean minimum path length:  27.95


In [10]:
last_path_lengths, min_path_lengths, running_times = run_weaning_experiment((0.3, 0.6), 100)
print('Running times: ', np.mean(running_times))
print('Mean last path length: ', np.mean(last_path_lengths))
print('Mean minimum path length: ', np.mean(min_path_lengths))

0/100
1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
18/100
19/100
20/100
21/100
22/100
23/100
24/100
25/100
26/100
27/100
28/100
29/100
30/100
31/100
32/100
33/100
34/100
35/100
36/100
37/100
38/100
39/100
40/100
41/100
42/100
43/100
44/100
45/100
46/100
47/100
48/100
49/100
50/100
51/100
52/100
53/100
54/100
55/100
56/100
57/100
58/100
59/100
60/100
61/100
62/100
63/100
64/100
65/100
66/100
67/100
68/100
69/100
70/100
71/100
72/100
73/100
74/100
75/100
76/100
77/100
78/100
79/100
80/100
81/100
82/100
83/100
84/100
85/100
86/100
87/100
88/100
89/100
90/100
91/100
92/100
93/100
94/100
95/100
96/100
97/100
98/100
99/100
Running times:  10.245891482830048
Mean last path length:  181.05
Mean minimum path length:  27.63
