-
Notifications
You must be signed in to change notification settings - Fork 3
/
temporal_mab.py
56 lines (46 loc) · 1.63 KB
/
temporal_mab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from datetime import datetime
import numpy as np
import categories_hierarchy as ch
from container_mab import ContainerMAB
def normalize(values):
v = np.linalg.norm([v[1] for v in values])
return [(val[0], val[1]/v) for val in values]
def train(events):
# Load Categories Hierarchy
print 'Loading Categories Hierarchy', datetime.now()
h = ch.load()
categories = h.keys()
mab = ContainerMAB(n_arms=len(categories), n_bins=24)
n_trials = events.shape[0]
returned_cate = [None for i in range(n_trials)]
rewards = [0.0 for i in range(n_trials)]
cumulative_rewards = [0.0 for i in range(n_trials)]
progress = n_trials
for cnt, (r_ind, r) in enumerate(events.iterrows()):
if cnt % (progress/10) == 0:
print ' Progress:', cnt*100/progress, '%', datetime.now()
t = r['datetime']
cate_idx = mab.select_arms(int(t.hour))
category = categories[cate_idx]
returned_cate[cnt] = cate_idx
# Compute Reward
v_category = r['venue_category_id']
if v_category in h:
reward = h.dist_to_LCA(category, v_category, 0)
reward = 1 - (reward/3)**2
else:
reward = 0
mab.update(t.hour, cate_idx, reward)
# Compute Cumulative Reward
rewards[cnt] = reward
if cnt == 0:
cumulative_rewards[cnt] = reward
else:
cumulative_rewards[cnt] = cumulative_rewards[cnt-1] + reward
stats = {
'n_trials': n_trials,
'pulled_arms': returned_cate,
'rewards': rewards,
'cumulative_rewards': cumulative_rewards
}
return mab, stats