In [1]:
import sys  
sys.path.append('../scripts/')
from dp_policy_agent import *

In [2]:
class MeanMcl(Mcl): 
    def __init__(self, envmap, init_pose, num, motion_noise_stds={"nn":0.19, "no":0.001, "on":0.13, "oo":0.2}, \
                 distance_dev_rate=0.14, direction_dev=0.05):
        super().__init__(envmap, init_pose, num, motion_noise_stds, distance_dev_rate, direction_dev)
        
    def normalize1(self, t): #正規化方法1（-π〜π）
            while t < -np.pi: t += 2*np.pi
            while t >= np.pi: t -= 2*np.pi
            return t
        
    def normalize2(self, t): #正規化方法1（0〜2π）
            while t < 0.0: t += 2*np.pi
            while t >= 2*np.pi: t -= 2*np.pi
            return t
        
    def set_mean(self): 
        x = np.array([p.pose[0] for p in self.particles]).mean()
        y = np.array([p.pose[1] for p in self.particles]).mean()
        
        ts1 = np.array([self.normalize1(p.pose[2]) for p in self.particles])
        ts2 = np.array([self.normalize2(p.pose[2]) for p in self.particles])
        
        t = ts1.mean() if ts1.var() < ts2.var() else ts2.mean()
        
        self.pose = np.array([x,y,t]).T
            
    def observation_update(self, observation): 
        for p in self.particles:
            p.observation_update(observation, self.map, self.distance_dev_rate, self.direction_dev) 
        self.set_ml() #リサンプリング前に実行
        self.resampling() 
        self.set_mean() #リサンプリング後に実行

In [3]:
def trial(animation): 
    time_interval = 0.1
    world = PuddleWorld(30, time_interval, debug=not animation) 

    ##ランドマークの追加（意地悪な位置に）##
    m = Map()
#    for ln in [(1,4), (4,1), (-4,-4)]: m.append_landmark(Landmark(*ln))
    for ln in [(-4,2), (2,-3), (4,4), (-4,-4)]: m.append_landmark(Landmark(*ln)) #ランドマークが十分多い場合
    world.append(m) 
    
    ##ゴール・水たまりの追加（これは特に変更なし）##
    goal = Goal(-3,-3)
    puddles = [Puddle((-2, 0), (0, 2), 0.1), Puddle((-0.5, -2), (2.5, 1), 0.1)] 
    world.append(goal)
    world.append(puddles[0]) 
    world.append(puddles[1])
    
    ##ロボットを作る##
    init_pose = np.array([2.5, 2.5, 0]).T
    pf = MeanMcl(m, init_pose, 100)
    a = DpPolicyAgent(time_interval, pf, goal)
    r = Robot(init_pose, sensor=Camera(m), agent=a, color="red")

    world.append(r)
        
    world.draw()
    
    return a #a.total_reward+a.final_value, a.in_goal

In [4]:
def evaluation():
    #with open("avg_result.txt", "w") as f:
    with open("enough_landmark_result.txt", "w") as f:
        num = 1000
        for i in range(num):
            a = trial(False)
            f.write("{} {}\n".format(a.total_reward+a.final_value, a.in_goal))
            f.flush()
            
evaluation()

