/
basicPOMCP.jl
305 lines (257 loc) · 10.4 KB
/
basicPOMCP.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#UAI 2023
#Compute the return and the runtime of the BasicPOMCP solver for POMDP planning
using Pkg
Pkg.add("POMDPs")
using POMDPs
POMDPs.add_registry()
Pkg.add("BasicPOMCP")
Pkg.add("CSV")
Pkg.add("DataFrames")
Pkg.add("DataFramesMeta")
Pkg.add("QuickPOMDPs")
Pkg.add("POMDPModelTools")
Pkg.add("POMDPModels")
Pkg.add("POMDPSimulators")
Pkg.add("Random")
using Random
using POMDPModels, POMDPSimulators, BasicPOMCP
using QuickPOMDPs
using CSV
using DataFrames, DataFramesMeta
using CSV: File
using POMDPModelTools: SparseCat
using POMDPModelTools
using QuickPOMDPs: DiscreteExplicitPOMDP
# QuickPOMDPs --> Discrete Explicit Interface
#https://juliapomdp.github.io/QuickPOMDPs.jl/dev/discrete_explicit/#Exampl
runtime= @elapsed begin
# Domain riverswim
discountpath= joinpath(@__DIR__,"domain","riverswim",
"parameters.csv");
testpath = joinpath(@__DIR__,"domain","riverswim",
"test.csv");
initialpath = joinpath(@__DIR__,"domain","riverswim",
"initial.csv");
#=
# Domain inventory
discountpath= joinpath(@__DIR__,"domain","inventory",
"parameters.csv");
testgpath = joinpath(@__DIR__,"domain","inventory",
"test.csv");
initialpath = joinpath(@__DIR__,"domain","inventory",
"initial.csv");
=#
#=
# Domain hiv
discountpath= joinpath(@__DIR__,"domain","hiv",
"parameters.csv");
testpath = joinpath(@__DIR__,"domain","hiv",
"test.csv");
initialpath = joinpath(@__DIR__,"domain","hiv",
"initial.csv");
=#
#=
# Domain population
discountpath= joinpath(@__DIR__,"domain","population",
"parameters.csv");
testpath = joinpath(@__DIR__,"domain","population",
"test.csv");
initialpath = joinpath(@__DIR__,"domain","population",
"initial.csv");
=#
#=
# Domain population_small
discountpath= joinpath(@__DIR__,"domain","population_small",
"parameters.csv");
testpath = joinpath(@__DIR__,"domain","population_small",
"test.csv");
initialpath = joinpath(@__DIR__,"domain","population_small",
"initial.csv");
=#
# Read discount factor
discount = DataFrame(File(discountpath))[1,2]; #single value 0.9
# Read a training file and offset relevant indices by one
t_df = DataFrame(File(testpath)); #t_df: dataFrame of training.csv
t_dfone = @transform(t_df, :idstatefrom = :idstatefrom .+1, :idaction = :idaction .+ 1,
:idstateto = :idstateto .+1, :idoutcome = :idoutcome .+ 1);
statecount = max(maximum(t_dfone.idstatefrom), maximum(t_dfone.idstateto))
actioncount = maximum(t_dfone.idaction)
modelcount = maximum(t_dfone.idoutcome)
lambda = 1 / modelcount
statemodelcount = statecount * modelcount
# read initial distribution over states
i_df = DataFrame(File(initialpath)); #i_df: dataFrame of initial.csv
i_dfOne=@transform(i_df, :idstate= :idstate .+1);
# Use SparseCat(values, probabilities)
#values is an iterable object containing the possible values (can be of any type)
#in the distribution that have nonzero probability. probabilities is an iterable object
#that contains the associated probabilities.
len_initial_states = size(i_dfOne,1)
values = [n for n=1:statemodelcount]
probabilities = [0.0*n for n=1:statemodelcount]
for s in 1: len_initial_states
for m in 1:modelcount
probabilities[ (i_dfOne.idstate[s]-1)*modelcount + m] = i_dfOne.probability[s]*lambda
end
end
#intial belief
b0= SparseCat(values, probabilities)
# state space S 1: (1,1); 2:(1,2);.... last:(|S|,|M|)
# state number: sn= ceiling(n/modelcount)
# model number: mn = n-(sn-1)*modelcount
S = 1:statemodelcount
#Action space A
A = 1:actioncount
# Observation space O = {1,2,...,|S|},S is the state space of MMDP
O = 1:statecount
#T(s,a,s ) is the probability of transitioning to state s'from state s after taking action aa.
#Construct transition probaility T, |S|x|A|x|S|, T[s, a, s'] = p(s'|a,s) for POMDP
#s1: next state, s: current state
function T(s1,a,s2)
s_1 = ceil(s1/modelcount );
s_2 = ceil(s2/modelcount) ;
m1 = s1 -(s_1-1)*modelcount;
m2 = s2 -(s_2-1)*modelcount;
if m1 != m2
return 0.0
else
for i in 1:size(t_dfone,1)
if t_dfone.idstateto[i] == s_2 && t_dfone.idstatefrom[i] == s_1 && t_dfone.idoutcome[i] == m1 && t_dfone.idaction[i]== a
return t_dfone.probability[i]
end
end
return 0.0 # No p(s1|s,a) in model m
end
end
# Z::Function: Observation probability distribution function; O(a, s', o) is the
# probability of receiving observation oo when state s'is reached after action a.
function Z(a, s, o)
s_1 = ceil(s/modelcount );
if s_1 == o
return 1.0
else
return 0.0
end
end
# Calculate r^m(s,a)
function r(s,a,m)
sumr = 0.0
for i in 1:size(t_dfone,1)
if t_dfone.idstatefrom[i] == s && t_dfone.idaction[i]== a && t_dfone.idoutcome[i] == m
sumr = sumr + t_dfone.reward[i] *t_dfone.probability[i]
end
end
if abs(sumr) > 1e-15
return sumr
else
return 0
end
end
# R::Function: Reward function; R(s,a)R(s,a) is the reward for taking action aa in state ss.
function R(s, a)
s1 = ceil(s/modelcount );
m1 = s -(s1-1)*modelcount;
return r(s1,a,m1)
end
m = DiscreteExplicitPOMDP(S,A,O,T,Z,R,discount,b0)
rng = MersenneTwister(1);
#rng = 5
# The Partially Observable Monte Carlo Planning (POMCP) online solver for POMDPs.jl.
# Described inSilver, D., & Veness, J. (2010).
# Monte-Carlo Planning in Large POMDPs. In Advances in neural information processing systems (pp. 2164–2172).
# POMCPSolver implementation is on https://github.com/JuliaPOMDP/BasicPOMCP.jl
solver = POMCPSolver( tree_queries=50)
policy = solve(solver, m)
end # the end of run time
up = updater(policy)
return_ave = 0.0
num_trajectory = 50
timesteps = 10
returns_trajectory = []
trajectories = []
Random.seed!(1000)
for iteration in 1:50
r_total = 0.0
s = rand(initialstate(m))
b = initialize_belief(up,b0)
d = discount
for i in 1:timesteps
a = action(policy, b)
sp = rand(transition(m,s,a))
local o = rand(observation(m,s,a,sp))
local r =reward(m,s,a,sp,o)
s= sp
r_total += d*r
d = d *discount
b= update(up,b,a,o)
end
push!(returns_trajectory, r_total)
push!(trajectories, iteration)
global return_ave += r_total
end
ave = return_ave /num_trajectory
# Results for domain riverswim
# write returns to file
return_path = joinpath(@__DIR__,"resultfiles","return_pomcp_50_riverswim.csv")
return_sampled = DataFrame(Time=timesteps,Return=ave)
CSV.write(return_path, return_sampled)
# write runtimes to file
runtime_path = joinpath(@__DIR__,"resultfiles","time_pomcp_50_riverswim.csv")
runtime = DataFrame(Time=timesteps,Runtime=runtime/60)
CSV.write(runtime_path, runtime)
# write returns of all trajectories to file
returns_path = joinpath(@__DIR__,"resultfiles","all_returns_pomcp_50_riverswim.csv")
returns_sampled = DataFrame(Time=trajectories,Return=returns_trajectory)
CSV.write(returns_path, returns_sampled)
#=
# Results for domain inventory
# write returns to file
return_path = joinpath(@__DIR__,"resultfiles","return_pomcp_50_inventory.csv")
return_sampled = DataFrame(Time=timesteps,Return=ave)
CSV.write(return_path, return_sampled)
# write runtimes to file
runtime_path = joinpath(@__DIR__,"resultfiles","time_pomcp_50_inventory.csv")
runtime = DataFrame(Time=timesteps,Runtime=runtime/60)
CSV.write(runtime_path, runtime)
# write returns of all trajectories to file
returns_path = joinpath(@__DIR__,"resultfiles","all_returns_pomcp_50_inventory.csv")
returns_sampled = DataFrame(Time=trajectories,Return=returns_trajectory)
CSV.write(returns_path, returns_sampled)
=#
#=
# Results for domain hiv
# write returns to file
return_path = joinpath(@__DIR__,"resultfiles","return_pomcp_10_hiv.csv")
return_sampled = DataFrame(Time=timesteps,Return=ave)
CSV.write(return_path, return_sampled)
# write runtimes to file
runtime_path = joinpath(@__DIR__,"resultfiles","time_pomcp_10_hiv.csv")
runtime = DataFrame(Time=timesteps,Runtime=runtime/60)
CSV.write(runtime_path, runtime)
# write returns of all trajectories to file
returns_path = joinpath(@__DIR__,"resultfiles","all_returns_pomcp_10_hiv.csv")
returns_sampled = DataFrame(Time=trajectories,Return=returns_trajectory)
CSV.write(returns_path, returns_sampled)
=#
#=
# Results for domain population
# write returns to file
return_path = joinpath(@__DIR__,"resultfiles","return_pomcp_50_population.csv")
return_sampled = DataFrame(Time=timesteps,Return=ave)
CSV.write(return_path, return_sampled)
# write runtimes to file
runtime_path = joinpath(@__DIR__,"resultfiles","time_pomcp_50_population.csv")
runtime = DataFrame(Time=timesteps,Runtime=runtime/60)
CSV.write(runtime_path, runtime)
=#
#=
# Results for domain population_small
# write returns to file
return_path = joinpath(@__DIR__,"resultfiles","return_pomcp_50_population_small.csv")
return_sampled = DataFrame(Time=timesteps,Return=ave)
CSV.write(return_path, return_sampled)
# write runtimes to file
runtime_path = joinpath(@__DIR__,"resultfiles","time_pomcp_50_population_small.csv")
runtime = DataFrame(Time=timesteps,Runtime=runtime/60)
CSV.write(runtime_path, runtime)
=#