In [1]:
import sys
sys.path.append('../scripts/')
from puddle_world import *
from mcl import *
import itertools
import collections

In [5]:
class QmdpAgent(MclAgent):
    def __init__(self, goal, puddles, time_interval, pf, sampling_num=10 , widths=np.array([0.2, 0.2, math.pi/18]).T, \
                 lowerleft=np.array([-4, -4]).T, upperright=np.array([4, 4]).T): 
        super().__init__(time_interval, 0.0, 0.0, pf)
        
        self.goal = goal
        self.pose_min = np.r_[lowerleft, 0] 
        self.pose_max = np.r_[upperright, math.pi*2]
        self.widths = widths
        self.index_nums = ((self.pose_max - self.pose_min)/self.widths).astype(int)
        nx, ny, nt = self.index_nums
        self.indexes = list(itertools.product(range(nx), range(ny), range(nt)))
        
        self.value_function = self.init_value()
        
        policy = self.init_policy()
        self.actions = list(set([tuple(policy[i]) for i in self.indexes]))
        
        self.state_transition_probs = self.init_state_transition_probs(time_interval, sampling_num)
        self.depths = self.depth_means(puddles, sampling_num)        
        
    def init_value(self):
        tmp = np.zeros(np.r_[self.index_nums])
        for line in open("value.txt", "r"):
            d = line.split()
            tmp[int(d[0]), int(d[1]), int(d[2])] = float(d[3])
            
        return tmp
    
    def init_policy(self):
        tmp = np.zeros(np.r_[self.index_nums,2]) #制御出力が2次元なので、配列の次元を4次元に
        for index in self.indexes:
            center = self.pose_min + self.widths*(np.array(index).T + 0.5)  #セルの中心の座標
            tmp[index] = PuddleIgnoreAgent.policy(center, self.goal)
            
        return tmp
    
    def init_state_transition_probs(self, time_interval, sampling_num):
        ###セルの中の座標を均等にsampling_num**3点サンプリング###
        dx = np.linspace(0.001, self.widths[0]*0.999, sampling_num) #隣のセルにはみ出さないように端を避ける
        dy = np.linspace(0.001, self.widths[1]*0.999, sampling_num)
        dt = np.linspace(0.001, self.widths[2]*0.999, sampling_num)
        samples = list(itertools.product(dx, dy, dt))
        
        ###各行動、各方角でサンプリングした点を移動してインデックスの増分を記録###
        tmp = {}
        for a in self.actions:
            for i_t in range(self.index_nums[2]):
                transitions = []
                for s in samples:
                    before = np.array([s[0], s[1], s[2] + i_t*self.widths[2]]).T + self.pose_min  #遷移前の姿勢
                    before_index = np.array([0, 0, i_t]).T                                                      #遷移前のインデックス
                
                    after = IdealRobot.state_transition(a[0], a[1], time_interval, before)   #遷移後の姿勢
                    after_index = np.floor((after - self.pose_min)/self.widths).astype(int)   #遷移後のインデックス
                    
                    transitions.append(after_index - before_index)                                  #インデックスの差分を追加
                    
                unique, count = np.unique(transitions, axis=0, return_counts=True)   #集計（どのセルへの遷移が何回か）
                probs = [c/sampling_num**3 for c in count]                   #サンプル数で割って確率にする
                tmp[a,i_t] = list(zip(unique, probs))
                
        return tmp
    
    def depth_means(self, puddles, sampling_num):
        ###セルの中の座標を均等にsampling_num**2点サンプリング###
        dx = np.linspace(0, self.widths[0], sampling_num) 
        dy = np.linspace(0, self.widths[1], sampling_num)
        samples = list(itertools.product(dx, dy))
        
        tmp = np.zeros(self.index_nums[0:2]) #深さの合計が計算されて入る
        for xy in itertools.product(range(self.index_nums[0]), range(self.index_nums[1])):
            for s in samples:
                pose = self.pose_min + self.widths*np.array([xy[0], xy[1], 0]).T + np.array([s[0], s[1], 0]).T #セルの中心の座標
                for p in puddles:
                    tmp[xy] += p.depth*p.inside(pose) #深さに水たまりの中か否か（1 or 0）をかけて足す
                        
            tmp[xy] /= sampling_num**2 #深さの合計から平均値に変換
                       
        return tmp
        
    def policy(self, pose): #姿勢から離散状態のインデックスを作って方策を参照して返すだけ
        for p in self.pf.particles:
            index = np.floor((p.pose - self.pose_min)/self.widths).astype(int) 
            index[2] = (index[2] + self.index_nums[2]*1000)%self.index_nums[2] #角度の正規化
            for i in [0,1]:   
                if index[i] < 0: index[i] = 0
                elif index[i] >= self.index_nums[i]: index[i] = self.index_nums[i] - 1
                    
            cur_value = self.value_function[tuple(index)]
            next_values = [ self.action_value(a, index) for a in self.actions ]

            print(cur_value, next_values)
        
        return (0.2, math.pi/20)

    def action_value(self, action, index): #はみ出しペナルティー追加
        value = 0.0
        for delta, prob in self.state_transition_probs[(action, index[2])]: 
            after, edge_reward = self.edge_correction(np.array(index).T + delta)
            after = tuple(after)
            reward = - self.time_interval * self.depths[(after[0], after[1])] * self.puddle_coef - self.time_interval + edge_reward
            value += (self.value_function[after] + reward) * prob

        return value
            
    def edge_correction(self, index): #変更
        edge_reward = 0.0
        index[2] = (index[2] + self.index_nums[2])%self.index_nums[2] #方角の処理
        
        for i in range(2):
            if index[i] < 0:
                index[i] = 0
                edge_reward = -1e100
            elif index[i] >= self.index_nums[i]:
                index[i] = self.index_nums[i]-1
                edge_reward = -1e100
                
        return index, edge_reward
        
    def decision(self, observation=None):
        self.pf.motion_update(self.prev_nu, self.prev_omega, self.time_interval)
        self.pf.observation_update(observation)
        
        nu, omega = self.policy(self.pf.ml.pose)
        self.prev_nu, self.prev_omega = nu, omega
        return nu, omega

In [6]:
if __name__ == '__main__':  ###dppolicyagentrun
    time_interval = 0.1
    world = PuddleWorld(0.1, time_interval, debug=True) 

    m = Map()
#    m.append_landmark(Landmark(-4,2))
#    m.append_landmark(Landmark(2,-3))
#    m.append_landmark(Landmark(4,4))
    m.append_landmark(Landmark(-4,-4))
    world.append(m)
    
    ###ゴールの追加###
    goal = Goal(-3,-3)
    world.append(goal)
    
    ###水たまりの追加###
    puddles = [Puddle((-2, 0), (0, 2), 0.1), Puddle((-0.5, -2), (2.5, 1), 0.1)] 
    world.append(puddles[0]) 
    world.append(puddles[1])
    
    ### いくつかの初期位置を定義 ###   ###dppolicyagentrun

    init_pose = np.array([3, 3, 0]).T
    pf = Mcl(m, init_pose, 100)
    a = QmdpAgent(Goal(-3,-3), puddles, time_interval, pf)
    r = Robot(init_pose, sensor=Camera(m, distance_bias_rate_stddev=0, direction_bias_stddev=0), 
              agent=a, color="red", bias_rate_stds=(0,0))

    world.append(r)
        
    world.draw()

AttributeError: 'QmdpAgent' object has no attribute 'puddle_coef'