In [None]:
import sys
sys.path.append('./scripts/')
from dp_policy_agent import *
from dynamic_programming import *
from hazard_mcl import *

In [None]:
class HazardQmdpAgent(DpPolicyAgent): ###qmdp3
    def __init__(self, time_interval, estimator, goal, puddles, sampling_num=10, widths=np.array([0.2, 0.2, math.pi/18]).T, \
                 puddle_coef=1000000.0, lowerleft=np.array([-4, -4]).T, upperright=np.array([4, 4]).T): 
        super().__init__(time_interval, estimator, goal, puddle_coef, widths, lowerleft, upperright)
        
        self.dp = DynamicProgramming(widths, goal, puddles, time_interval, sampling_num, puddle_coef)
        self.dp.value_function = self.init_value()
        self.evaluations = np.array([0.0, 0.0, 0.0])
        self.current_value = 0.0
        
        self.history = [(0, 0)] #行動の履歴を記録
        self.hazard = {}
        
    def init_value(self):
        tmp = np.zeros(self.dp.index_nums)
        for line in open("./data/dp_value.txt", "r"):
            d = line.split()
            tmp[int(d[0]), int(d[1]), int(d[2])] = float(d[3])
            
        return tmp
    
    def action_value(self, action, index, particle, out_penalty=True):
        value = 0.0
        hazard = 0.0
        for delta, prob in self.dp.state_transition_probs[(action, index[2])]:
            after, out_reward = self.dp.out_correction(np.array(index).T + delta)
            after = tuple(after)
                
            reward = - self.dp.time_interval * self.dp.depths[(after[0], after[1])] * self.dp.puddle_coef - self.dp.time_interval + out_reward*out_penalty
            value += (self.dp.value_function[after] + reward) * prob * particle.hazard_weight
            
            hazard += self.dp.time_interval * self.dp.depths[(after[0], after[1])] * self.dp.puddle_coef * prob
        
        return value, hazard
    
    def evaluation(self, action, indexes, particles):
        evaluations = [self.action_value(action, indexes[i], particles[i], out_penalty=False) for i in range(len(indexes))]
        return sum([e[0] for e in evaluations])/len(indexes), [e[1]for e in evaluations]
        
    def policy(self, pose, goal=None): #追加（引数poseは使わない）
        indexes = [self.to_index(p.pose, self.pose_min, self.index_nums, self.widths) for p in self.estimator.particles]
        self.current_value = sum([self.dp.value_function[i] for i in indexes])/len(indexes)
        es = [self.evaluation(a, indexes, self.estimator.particles) for a in self.dp.actions]
        self.evaluations = [e[0] for e in es]
        hazard_weights = [e[1] for e in es]
        self.history.append(self.dp.actions[np.argmax(self.evaluations)]) #ここから変更。historyにQ-MDPで選んだ行動を追記
        
        for i, p in enumerate(self.estimator.particles):
            max_hazard_weight = max([e[i] for e in hazard_weights])
            p.hazard_weight = (p.hazard_weight - 1.0)*0.5 + 1.0 + max_hazard_weight
        
     #   if self.history[-1][0] + self.history[-2][0] == 0.0 and self.history[-1][1] + self.history[-2][1] == 0.0: #2回の行動で停止していたら前進
     #       return (1.0, 0.0)
        
        return self.history[-1]
    
    def draw(self, ax, elems):
        super().draw(ax, elems)
        elems.append(ax.text(-4.5, -4.6, "{:.3} => [{:.3}, {:.3}, {:.3}]".format(self.current_value, *self.evaluations), fontsize=8))

In [None]:
def trial(animation):
    time_interval = 0.1
    world = PuddleWorld(300, time_interval, debug=not animation) 

    ##ランドマークの追加（意地悪な位置に）##
    m = Map()
    for ln in [(1,4), (4,1), (-4,-4)]: m.append_landmark(Landmark(*ln))
    world.append(m) 
    
    ##ゴール・水たまりの追加（これは特に変更なし）##
    goal = Goal(-3,-3)
    puddles = [Puddle((-2, 0), (0, 2), 0.1), Puddle((-0.5, -2), (2.5, 1), 0.1)] 
    world.append(goal)
    world.append(puddles[0]) 
    world.append(puddles[1])
    
    ##ロボットを作る##
    init_pose = np.array([2.5, 2.5, 0]).T
    pf = HazardMcl(m, init_pose, 100)
    a = HazardQmdpAgent(time_interval, pf, goal, puddles,puddle_coef=1000000.0) 
    r = Robot(init_pose, sensor=Camera(m), agent=a, color="red")

    world.append(r)
    world.draw()
    
    return a

In [None]:
trial(True)