In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt

from bayegent import Bayegent
from environment import GridMazeEnvironment

In [2]:
def run_one_seed(seed, parameters, learning_method='bayesian', n_runs=100):
    environment = GridMazeEnvironment(seed)
    agent = Bayegent(environment, seed, parameters=parameters)

    if learning_method == 'bayesian':
        position_histories = agent.learn_bayesian(n_runs)
    else:
        position_histories = agent.learn_qtable(n_runs)

    return position_histories


In [3]:
def run_experiment(curiosity_interval=(0.2,0.9), gridsize=10, n_runs=100):
    # Constants
    curiosities = list(reversed(np.linspace(curiosity_interval[0],curiosity_interval[1],n_runs)))

    running_times = { 'bayesian': [], 'qlearning': [] }
    last_path_lengths = { 'bayesian': [], 'qlearning': [] }
    min_path_lengths = { 'bayesian': [], 'qlearning': [] }

    # Iterate over 100 seeds

    for seed in range(500):
        print(f'{seed}/500')
        start = time.time()
        position_histories_bayesian = run_one_seed(seed, 
                                        parameters={
                                            'curiosity': curiosities,
                                            'step_reward': -0.1,
                                            'goal_reward': 1,
                                            'learning_rate': 0.5, # 0.5
                                            'discount_factor': 0.8, # 0.8
                                        },
                                        learning_method='bayesian',
                                        n_runs=n_runs)
        end = time.time()
        running_times['bayesian'].append(end-start)

        start = time.time()
        position_histories_rl = run_one_seed(seed, 
                                        parameters={
                                            'curiosity': curiosities,
                                            'step_reward': -0.1,
                                            'goal_reward': 1,
                                            'learning_rate': 0.5, # 0.5
                                            'discount_factor': 0.8, # 0.8
                                        },
                                        learning_method='rl',
                                        n_runs=n_runs)
        end = time.time()
        running_times['qlearning'].append(end-start)

        path_lengths_bayesian = [len(pl) for pl in position_histories_bayesian]
        path_lengths_rl = [len(pl) for pl in position_histories_rl]

        last_path_lengths['bayesian'].append(path_lengths_bayesian[-1])
        last_path_lengths['qlearning'].append(path_lengths_rl[-1])

        min_path_lengths['bayesian'].append(min(path_lengths_bayesian))
        min_path_lengths['qlearning'].append(min(path_lengths_rl))

    return last_path_lengths, min_path_lengths, running_times

In [4]:
last_path_lengths, min_path_lengths, running_times = run_experiment()

0/500
1/500
2/500
3/500
4/500
5/500
6/500
7/500
8/500
9/500
10/500
11/500
12/500
13/500
14/500
15/500
16/500
17/500
18/500
19/500
20/500
21/500
22/500
23/500
24/500
25/500
26/500
27/500
28/500
29/500
30/500
31/500
32/500
33/500
34/500
35/500
36/500
37/500
38/500
39/500
40/500
41/500
42/500
43/500
44/500
45/500
46/500
47/500
48/500
49/500
50/500
51/500
52/500
53/500
54/500
55/500
56/500
57/500
58/500
59/500
60/500
61/500
62/500
63/500
64/500
65/500
66/500
67/500
68/500
69/500
70/500
71/500
72/500
73/500
74/500
75/500
76/500
77/500
78/500
79/500
80/500
81/500
82/500
83/500
84/500
85/500
86/500
87/500
88/500
89/500
90/500
91/500
92/500
93/500
94/500
95/500
96/500
97/500
98/500
99/500
100/500
101/500
102/500
103/500
104/500
105/500
106/500
107/500
108/500
109/500
110/500
111/500
112/500
113/500
114/500
115/500
116/500
117/500
118/500
119/500
120/500
121/500
122/500
123/500
124/500
125/500
126/500
127/500
128/500
129/500
130/500
131/500
132/500
133/500
134/500
135/500
136/500
137/500
138/50

In [8]:
mean_pathlength_bayesian = np.mean(last_path_lengths['bayesian'])
mean_pathlength_qlearning = np.mean(last_path_lengths['qlearning'])

mean_pathlength_bayesian, mean_pathlength_qlearning

(957.348, 474.048)

In [11]:
mean_minpathlength_bayesian = np.mean(min_path_lengths['bayesian'])
mean_minpathlength_qlearning = np.mean(min_path_lengths['qlearning'])

mean_minpathlength_bayesian, mean_minpathlength_qlearning

(28.852, 70.088)

In [12]:
mean_runtime_bayesian = np.mean(running_times['bayesian'])
mean_runtime_qlearning = np.mean(running_times['qlearning'])

mean_runtime_bayesian, mean_runtime_qlearning

(16.1839386548996, 2.9181153874397276)