-
Notifications
You must be signed in to change notification settings - Fork 1
/
sightseeingExp.py
115 lines (103 loc) · 3.29 KB
/
sightseeingExp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from QTPAgent import AlternatingQTPAgent, JointQTPAgent, RandomQueryAgent
from CMPExp import experiment
import util
from sightseeing import Sightseeing
import random
import sys
import config
from cmp import QueryType
import getopt
def main():
# discount factor
gamma = 0.9
# the time step that the agent receives the response
responseTime = 10
scale = 1
try:
opts, args = getopt.getopt(sys.argv[1:], "r:l:s:d:a:c:v")
except getopt.GetoptError:
sys.exit(2)
for opt, arg in opts:
if opt == '-r':
random.seed(int(arg))
elif opt == '-l':
responseTime = int(arg)
elif opt == '-s':
scale = int(arg)
elif opt == '-d':
gamma = float(arg)
elif opt == '-a':
agentName = arg
elif opt == '-v':
config.VERBOSE = True
width = 40
height = 3
locations = [(random.randint(1, width - 1), random.randint(0, height - 1)) for _ in xrange(6 * scale)]
# sort by x coordinate for convenience
locations.sort(key=lambda _: _[0])
queries = []
for _ in xrange(6 * scale):
x, y = locations[_]
queries.append((x, y, 1, 0))
queries.append((x, y, -1, 0))
rewards = []
rewardNum = 3
# divide features by regions
for _ in xrange(rewardNum):
# for each reward candidate, 5 possible sights
reward = util.Counter()
for idx in xrange(2 * scale * _, 2 * scale * (_ + 1)):
reward[locations[idx]] = 1
rewards.append(reward)
def relevance(fState, query):
# see whether feature, query are relevant
if fState[2] == 1:
# forward
if query[0] >= fState[0] and query[2] == 1:
return True
else:
# backward
if query[0] <= fState[0] and query[2] == -1:
return True
def rewardGen(rewards):
def rewardFunc(s):
x, y, dir, status = s
if status == 1:
if (x, y) in rewards.keys():
return rewards[(x, y)]
else:
return -1
elif s[0] == 0 and s[1] == 0 and s[2] != 0:
return 2
else:
return 0
return rewardFunc
rewardSet = [rewardGen(reward) for reward in rewards]
initialPhi = [1.0 / rewardNum] * rewardNum
if agentName == 'NQ':
queries = [0] # make a dummy query set
queryType = QueryType.NONE
else:
queryType = QueryType.ACTION
cmp = Sightseeing(queries, random.choice(rewardSet), gamma, responseTime, width, height)
if agentName == 'JQTP' or agentName == 'NQ':
agent = JointQTPAgent(cmp, rewardSet, initialPhi, queryType, gamma)
elif agentName == 'AQTP':
agent = AlternatingQTPAgent(cmp, rewardSet, initialPhi, queryType, relevance, gamma)
elif agentName == 'AQTP-NF':
# don't filter query. Assume all queries are relevant.
agent = AlternatingQTPAgent(cmp, rewardSet, initialPhi, queryType, lambda fS, q: True, gamma)
elif agentName == 'AQTP-RS':
agent = AlternatingQTPAgent(cmp, rewardSet, initialPhi, queryType, relevance, gamma, restarts=1)
elif agentName == 'RQ':
agent = RandomQueryAgent(cmp, rewardSet, initialPhi, queryType, gamma)
elif agentName == 'TPNQ':
agent = JointQTPAgent(cmp, rewardSet, initialPhi, queryType, gamma, queryIgnored=True)
else:
raise Exception("Unknown Agent " + agentName)
ret, qValue, timeElapsed = experiment(cmp, agent, gamma, rewardSet, queryType)
print ret
print qValue
print timeElapsed
if __name__ == '__main__':
main()