In [2]:
def reward_function(params):
    '''
    Example of rewarding the agent to follow center line
    '''
    ## import libraries
    import math
    ## Read input parameters
    all_wheels_on_track = params["all_wheels_on_track"]
    x = params["x"]
    y = params["y"]
    is_left_of_center = params["is_left_of_center"]
    heading = params["heading"]
    progress = params["progress"]
    steps = params["steps"]
    speed = params["speed"]
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    steering = params['steering_angle'] # - is right and + is left
    waypoints = params["waypoints"] # list of all waypoints (float,float) of the next milestones
    closest_waypoints = params["closest_waypoints"] #(int,int) he zero-based indices of the two neighboring waypoints closest to the vehicle's current position of (x, y). The distance is measured by the Euclidean distance from the center of the vehicle.
    
    # init reward
    reward = 0
    
    
    ####### reward functions #################
    ## award being close to the center
    def award_close_to_center(reward, track_width, distance_from_center):
        # Calculate 3 markers that are at varying distances away from the center line
        marker_1 = 0.1 * track_width
        marker_2 = 0.25 * track_width
        marker_3 = 0.5 * track_width
        
        # Give higher reward if the car is closer to center line and vice versa
        if distance_from_center <= marker_1:
            reward += 1.0
        elif distance_from_center <= marker_2:
            reward += 0.5
        elif distance_from_center <= marker_3:
            reward += 0.1
        else:
            reward += 1e-3  # likely crashed/ close to off track
        return reward
        
    ## punish being off track
    def reward_on_the_same_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,reward_multiplier=1):
        
    
        # Calculate the direction of the center line based on the closest waypoints
        next_point = waypoints[closest_waypoints[1]]
        prev_point = waypoints[closest_waypoints[0]]
    
        # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
        track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
        
        # Convert to degree
        track_direction = math.degrees(track_direction)
        if track_direction < 0:
            converted_track_direction = 360 + track_direction
        else:
            converted_track_direction = track_direction
        print('track_direction is {converted_track_direction}'.format(converted_track_direction=converted_track_direction))
        if heading < 0:
            converted_direction = 360 + heading
        else:
            converted_direction = heading
        print('converted_direction is {converted_direction}'.format(converted_direction=converted_direction))    
        direction_diff = converted_track_direction - converted_direction
        if direction_diff > 300:
            direction_diff = converted_direction + converted_direction -360
        reward_for_dir_diff = distance_from_center*direction_diff
        reward += reward_for_dir_diff * reward_multiplier
        return reward
    def punish_on_different_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,is_left_of_center,reward_multiplier=1):
        
    
        # Calculate the direction of the center line based on the closest waypoints
        next_point = waypoints[closest_waypoints[1]]
        prev_point = waypoints[closest_waypoints[0]]
    
        # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
        track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
        
        # Convert to degree
        track_direction = math.degrees(track_direction)
        if track_direction < 0:
            converted_track_direction = 360 + track_direction
        else:
            converted_track_direction = track_direction
        print('track_direction is {converted_track_direction}'.format(converted_track_direction=converted_track_direction))
        if heading < 0:
            converted_direction = 360 + heading
        else:
            converted_direction = heading
        print('converted_direction is {converted_direction}'.format(converted_direction=converted_direction))    
        direction_diff = converted_track_direction - converted_direction ## negative means right
        if direction_diff > 300:
            direction_diff = converted_direction + converted_direction -360
        if direction_diff > 0:
            if is_left_of_center:
                reward = 1
            else:
                reward = -1
        else:
            if is_left_of_center:
                reward = -1
            else:
                reward = 1
        return reward
    
    def reward_speed(reward,speed):
        return reward + speed
    def remove_reward_if_off_track(reward, track_width, distance_from_center):
        # Calculate 3 markers that are at varying distances away from the center line
        marker_3 = 0.5 * track_width
        if distance_from_center > marker_3:
            reward = -100  # likely crashed/ close to off track
        return reward
    
    reward = award_close_to_center(reward, track_width, distance_from_center)
#     print(reward)
    reward = reward_on_the_same_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,reward_multiplier=1)
#     print(reward)
    if len(waypoints) - closest_waypoints[1] >= 2:
        reward = reward_on_the_same_side_as_waypoint(reward,waypoints,(closest_waypoints[0]+1,closest_waypoints[1]+1),heading, distance_from_center ,reward_multiplier=0.2)
#     print(reward)
    if len(waypoints) - closest_waypoints[1] >= 2:
        reward = reward_on_the_same_side_as_waypoint(reward,waypoints,(closest_waypoints[0]+2,closest_waypoints[1]+2),heading, distance_from_center ,reward_multiplier=-.2)
#     print(reward)
    reward = reward_speed(reward,speed)
    reward = punish_on_different_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,is_left_of_center,reward_multiplier=1)
    reward = remove_reward_if_off_track(reward, track_width, distance_from_center)
    
    return float(reward)

In [11]:
params={}
params["all_wheels_on_track"]=1
params["x"]=10
params["y"]=20
params["is_left_of_center"]=1
params["heading"]=40
params["progress"]=80
params["steps"]=20
params["speed"]=8
params['track_width']=2
params['distance_from_center']=0.8
params['steering_angle']=15 # - is right and + is left
params["waypoints"]=[[1,2],[2,3],[3,4],[4,5],[6,5.5],[10,6],[11,8]] # list of all waypoints (float,float) of the next milestones
params["closest_waypoints"]=[2,3]
params["is_left_of_center"]=True

In [3]:
reward_function(params)

NameError: name 'params' is not defined

In [4]:
def reward_function2(params):
    '''
    only speed and progress
    '''
    ## import libraries
    import math
    ## Read input parameters
    all_wheels_on_track = params["all_wheels_on_track"]
    x = params["x"]
    y = params["y"]
    is_left_of_center = params["is_left_of_center"]
    heading = params["heading"]
    progress = params["progress"]
    steps = params["steps"]
    speed = params["speed"]
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    steering = params['steering_angle'] # - is right and + is left
    waypoints = params["waypoints"] # list of all waypoints (float,float) of the next milestones
    closest_waypoints = params["closest_waypoints"] #(int,int) he zero-based indices of the two neighboring waypoints closest to the vehicle's current position of (x, y). The distance is measured by the Euclidean distance from the center of the vehicle.
    
    # init reward
    reward = 0
    
    
    ####### reward functions #################
    ## award being close to the center
    def award_close_to_center(reward, track_width, distance_from_center):
        # Calculate 3 markers that are at varying distances away from the center line
        marker_1 = 0.1 * track_width
        marker_2 = 0.25 * track_width
        marker_3 = 0.5 * track_width
        
        # Give higher reward if the car is closer to center line and vice versa
        if distance_from_center <= marker_1:
            reward += 1.0
        elif distance_from_center <= marker_2:
            reward += 0.5
        elif distance_from_center <= marker_3:
            reward += 0.1
        else:
            reward += 1e-3  # likely crashed/ close to off track
        return reward
        
    ## punish being off track
    def reward_on_the_same_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,reward_multiplier=1):
        
    
        # Calculate the direction of the center line based on the closest waypoints
        next_point = waypoints[max(closest_waypoints[0],closest_waypoints[1])]
        prev_point = waypoints[min(closest_waypoints[0],closest_waypoints[1])]
    
        # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
        track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
        
        # Convert to degree
        track_direction = math.degrees(track_direction)
        if track_direction < 0:
            converted_track_direction = 360 + track_direction
        else:
            converted_track_direction = track_direction
        print('track_direction is {converted_track_direction}'.format(converted_track_direction=converted_track_direction))
        if heading < 0:
            converted_direction = 360 + heading
        else:
            converted_direction = heading
        print('converted_direction is {converted_direction}'.format(converted_direction=converted_direction))    
        direction_diff = converted_track_direction - converted_direction
        if direction_diff > 300:
            direction_diff = 360 - converted_track_direction - converted_direction
        print(direction_diff)
        reward_for_dir_diff = distance_from_center*direction_diff
        reward += reward_for_dir_diff * reward_multiplier
        return reward
    
    def reward_speed(reward,speed):
        return reward + speed
    def remove_reward_if_off_track(reward, track_width, distance_from_center):
        # Calculate 3 markers that are at varying distances away from the center line
        marker_3 = 0.5 * track_width
        if distance_from_center > marker_3:
            reward = -100  # likely crashed/ close to off track
        return reward
    
    reward = award_close_to_center(reward, track_width, distance_from_center)
#     print(reward)
    reward = reward_on_the_same_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,reward_multiplier=1)
#     print(reward)
    if len(waypoints) - closest_waypoints[1] >= 2:
        reward = reward_on_the_same_side_as_waypoint(reward,waypoints,(closest_waypoints[0]+1,closest_waypoints[1]+1),heading, distance_from_center ,reward_multiplier=0.2)
#     print(reward)
    if len(waypoints) - closest_waypoints[1] >= 2:
        reward = reward_on_the_same_side_as_waypoint(reward,waypoints,(closest_waypoints[0]+2,closest_waypoints[1]+2),heading, distance_from_center ,reward_multiplier=-.2)
#     print(reward)
    reward = reward_speed(reward,speed)
    reward = remove_reward_if_off_track(reward, track_width, distance_from_center)
    
    return float(reward)

In [96]:
reward_function2(params)

track_direction is 45.0
converted_direction is 40
track_direction is 14.036243467926479
converted_direction is 40
track_direction is 7.125016348901798
converted_direction is 40


13.20579633904395

In [114]:
### adhere to side
def reward_function(params):
    '''
    only speed and progress
    '''
    ## import libraries
    import math
    ## Read input parameters
    all_wheels_on_track = params["all_wheels_on_track"]
    x = params["x"]
    y = params["y"]
    is_left_of_center = params["is_left_of_center"]
    heading = params["heading"]
    progress = params["progress"]
    steps = params["steps"]
    speed = params["speed"]
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    steering = params['steering_angle'] # - is right and + is left
    waypoints = params["waypoints"] # list of all waypoints (float,float) of the next milestones
    closest_waypoints = params["closest_waypoints"] #(int,int) he zero-based indices of the two neighboring waypoints closest to the vehicle's current position of (x, y). The distance is measured by the Euclidean distance from the center of the vehicle.
    
    # init reward
    reward = 0
    
    
    ####### reward functions #################
    ## award being close to the center
    def award_close_to_side(reward, track_width, distance_from_center):
        # Calculate 3 markers that are at varying distances away from the center line
        marker_1 = 0.1 * track_width
        marker_2 = 0.25 * track_width
        marker_3 = 0.5 * track_width
        
        # Give higher reward if the car is closer to center line and vice versa
        if distance_from_center <= marker_1:
            reward += 0.1
        elif distance_from_center <= marker_2:
            reward += 0.5
        elif distance_from_center <= marker_3:
            reward += 2
        else:
            reward += 1e-3  # likely crashed/ close to off track
        return reward
        
    ## punish being off track
    def reward_on_the_same_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,reward_multiplier=1):


        # Calculate the direction of the center line based on the closest waypoints
        next_point = waypoints[closest_waypoints[1]]
        prev_point = waypoints[closest_waypoints[0]]

        # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
        track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
        # Convert to degree
        track_direction = math.degrees(track_direction)

        # Calculate the difference between the track direction and the heading direction of the car
        direction_diff = abs(track_direction - heading)
        if direction_diff > 180:
            direction_diff = 360 - direction_diff

        # Penalize the reward if the difference is too large
        DIRECTION_THRESHOLD = 10.0
        if direction_diff > DIRECTION_THRESHOLD:
            reward *= 0.5

        return reward
    def reward_speed(reward,speed):
        return reward + speed
    def remove_reward_if_off_track(reward, track_width, distance_from_center):
        # Calculate 3 markers that are at varying distances away from the center line
        marker_3 = 0.5 * track_width
        if distance_from_center > marker_3:
            reward = -100  # likely crashed/ close to off track
        return reward
    
    reward = award_close_to_side(reward, track_width, distance_from_center)
    print(reward)
    reward = reward*reward_speed(reward,speed)
    print(reward)
    reward = reward_on_the_same_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,reward_multiplier=1)
    print(reward)
    reward = remove_reward_if_off_track(reward, track_width, distance_from_center)
    
    
    return float(reward)

In [115]:
reward_function(params)

2
20
20


20.0

In [48]:
### adhere to side with progress
def reward_function(params):
    '''
    only speed and progress
    '''
    ## import libraries
    import math
    ## Read input parameters
    all_wheels_on_track = params["all_wheels_on_track"]
    x = params["x"]
    y = params["y"]
    is_left_of_center = params["is_left_of_center"]
    heading = params["heading"]
    progress = params["progress"]
    steps = params["steps"]
    speed = params["speed"]
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    steering = params['steering_angle'] # - is right and + is left
    waypoints = params["waypoints"] # list of all waypoints (float,float) of the next milestones
    closest_waypoints = params["closest_waypoints"] #(int,int) he zero-based indices of the two neighboring waypoints closest to the vehicle's current position of (x, y). The distance is measured by the Euclidean distance from the center of the vehicle.
    
    # init reward
    reward = 0
    
    
    ####### reward functions #################
    ## award being close to the center
    def award_close_to_side(reward, track_width, distance_from_center,is_left_of_center):
        # Calculate 3 markers that are at varying distances away from the center line
        marker_1 = 0.1 * track_width
        marker_2 = 0.25 * track_width
        marker_3 = 0.35 * track_width
        
        # Give higher reward if the car is closer to center line and vice versa
        if distance_from_center <= marker_1:
            if is_left_of_center:
                reward += 4
            else:
                reward += 4
        elif distance_from_center <= marker_2:
            if is_left_of_center:
                reward += 8
            else:
                reward += 2
        elif distance_from_center <= marker_3:
            if is_left_of_center:
                reward += 6
            else:
                reward += 3
        else:
            reward += -5  # likely crashed/ close to off track
        return reward
        
    ## punish being off track
    def reward_on_the_same_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,reward_multiplier=1):


        # Calculate the direction of the center line based on the closest waypoints
        next_point = waypoints[closest_waypoints[1]]
        prev_point = waypoints[closest_waypoints[0]]

        # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
        track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
        # Convert to degree
        track_direction = math.degrees(track_direction)

        # Calculate the difference between the track direction and the heading direction of the car
        direction_diff = abs(track_direction - heading)
        if direction_diff > 180:
            direction_diff = 360 - direction_diff ### negative = right

        # Penalize the reward if the difference is too large
        DIRECTION_THRESHOLD = 10.0
        if direction_diff > DIRECTION_THRESHOLD:
            reward = -6
        elif direction_diff > 5:
            reward = -3
        return reward
    def reward_if_same_side_of_turn(reward,waypoints,closest_waypoints,heading, distance_from_center ,is_left_of_center,reward_multiplier=1):
        # Calculate the direction of the center line based on the closest waypoints
        next_point = waypoints[closest_waypoints[1]]
        prev_point = waypoints[closest_waypoints[0]]

        # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
        track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
        # Convert to degree
        track_direction = math.degrees(track_direction)

        # Calculate the difference between the track direction and the heading direction of the car
        direction_diff = abs(track_direction - heading)
        if direction_diff > 180:
            direction_diff = 360 - direction_diff ### negative = right

        if direction_diff > 0:
            if is_left_of_center:
                reward = 1
            else:
                reward = -1
        else:
            if is_left_of_center:
                reward = -1
            else:
                reward = 1
        return reward
    def reward_steering_same_side_of_turn(reward,waypoints,closest_waypoints,heading, distance_from_center ,is_left_of_center,reward_multiplier=1):
        # Calculate the direction of the center line based on the closest waypoints
        next_point = waypoints[closest_waypoints[1]]
        prev_point = waypoints[closest_waypoints[0]]

        # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
        track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0]) 
        # Convert to degree
        track_direction = math.degrees(track_direction)

        # Calculate the difference between the track direction and the heading direction of the car
        direction_diff = abs(track_direction - heading)
        if direction_diff > 180:
            direction_diff = 360 - direction_diff ### negative = right

        if direction_diff > 0:
            if steering > 0:
                reward = 1
            else:
                reward = -1
        else:
            if steering > 0:
                reward = -1
            else:
                reward = 1
        return reward
    
    def reward_speed(reward,speed):
        return reward + speed * 0.4
    def remove_reward_if_off_track(reward, track_width, distance_from_center):
        # Calculate 3 markers that are at varying distances away from the center line
        marker_3 = 0.5 * track_width
        if distance_from_center > marker_3:
            reward = -50  # likely crashed/ close to off track
        return reward
    
    def punish_steering(reward,steering):
        if steering > 10:
            reward = -1
        if steering > 20:
            reward = -2
        return reward
    
    
    reward = award_close_to_side(reward, track_width, distance_from_center,is_left_of_center)
    print(reward)
#     reward = reward_speed(reward,speed)
#     print(reward)
    reward = reward + punish_steering(reward,steering)
    print(reward)
    reward = reward + 2*reward_on_the_same_side_as_waypoint(reward,waypoints,closest_waypoints,heading, distance_from_center ,reward_multiplier=1)
    print(reward)
    if closest_waypoints[0]+1 < len(waypoints) and closest_waypoints[1]+1 < len(waypoints):
        reward = reward + reward_on_the_same_side_as_waypoint(reward,waypoints,(closest_waypoints[0]+1,closest_waypoints[1]+1),heading, distance_from_center ,reward_multiplier=1)
    print(reward)
    reward = reward + 2*reward_if_same_side_of_turn(reward,waypoints,closest_waypoints,heading, distance_from_center ,is_left_of_center,reward_multiplier=1)
    print(reward)
    if closest_waypoints[0]+1 < len(waypoints) and closest_waypoints[1]+1 < len(waypoints):
        reward = reward + 2*reward_if_same_side_of_turn(reward,waypoints,(closest_waypoints[0]+1,closest_waypoints[1]+1),heading, distance_from_center ,is_left_of_center,reward_multiplier=1)
    print(reward)
    if closest_waypoints[0]+2 < len(waypoints) and closest_waypoints[1]+2 < len(waypoints):
        reward = reward + 1*reward_if_same_side_of_turn(reward,waypoints,(closest_waypoints[0]+2,closest_waypoints[1]+2),heading, distance_from_center ,is_left_of_center,reward_multiplier=1)
    print(reward)
#     reward = reward + 2*reward_steering_same_side_of_turn(reward,waypoints,closest_waypoints,heading, distance_from_center ,is_left_of_center,reward_multiplier=1)
#     print(reward)
#     reward += is_left_of_center * 2 ## reward being left
    reward = remove_reward_if_off_track(reward, track_width, distance_from_center)
    reward = reward+progress/5
    
    
    return float(reward)

In [49]:
reward_function(params)

-5
-6
-18
-24
-22
-20
-19


-3.0