In [5]:
params = ['x', 'y', 'heading', 'waypoints', 'closest_waypoints', 'progress', 'steps', 
          'track_width', 'distance_from_centre', 'is_left_of_centre', 'all_wheels_on_track', 
          'speed', 'steering_angle']

Cases:
1) All wheels on track and Stay inside the two borders
2) Progress
3) Faster time performace bonus
4) Adjust to acceptable angle direction
5) Speed adjustment
6) Prevent zigzag




In [6]:
import math

def reward_function(params):
    
    #initialize reward
    reward = 0

    # Read input parameters 
    x = params['x']
    y = params['y']
    heading = params['heading']
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    progress = params['progress']
    steps = params['steps']
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    is_left_of_center = params['is_left_of_center']
    all_wheels_on_track = params['all_wheels_on_track']
    speed = params['speed']
    steering_angle = abs(params['steering_angle'])


    '''reward if the agent stays inside the two borders of the track'''
    if all_wheels_on_track and (0.5*track_width - distance_from_center) >= 0.05:
        reward += 30
    else:
        reward -= 50


    '''reward if the agent progress achieve 75% or higher'''
    if progress >= 75:
        reward += (progress/10)*3


    '''reward if the agent's heading adjust to acceptable angle'''
    prev_point = waypoints[closest_waypoints[0]]
    next_point = waypoints[closest_waypoints[1]]

    # Calculate the track direction
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    track_direction = math.degrees(track_direction)

    # Calculate the direction difference
    direction_diff = abs(track_direction-heading)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff

    # if the turning angle direction is not acceptable, penalize
    DIRECTION_THRESHOLD = 45.0
    if direction_diff > DIRECTION_THRESHOLD:
        reward -= 30
    

    '''reward if the agent runs at my custom waypoints (triangle)'''
    centre_variance = distance_from_center/track_width

    left_part = [62,63,64,65,66,67,68,69,70,
                 71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87]
    
    centre_part = [61,
                   88]
    
    right_part = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
                  17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
                  47,48,49,50,51,52,53,54,55,56,57,58,59,60,
                  89,90,91,92,93,
                  94,95,96,97,98,99,100,101,
                  102,103,104,105,106,107,108,109,110,111,112,
                  113,114,115,116,117]
    
    slow_part = [17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
                 71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,
                 102,103,104,105,106,107,108,109,110,111,112]

    fast_part = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
                 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,
                 94,95,96,97,98,99,100,101,
                 113,114,115,116,117]
    
    # running path control
    if next_point in left_part and is_left_of_center:
        reward += 10

    elif next_point in right_part and not is_left_of_center:
        reward += 10

    elif next_point in centre_part and centre_variance < 0.3:
        reward += 10

    else:
        reward -=10
    
    # speed control
    HIGH_SPEED = 3.0
    MODERATE_SPEED = 2.0
    LOW_SPEED = 1.0
    if next_point in fast_part:
        if speed == HIGH_SPEED:
            reward += 10
        
        elif speed >= MODERATE_SPEED and speed < HIGH_SPEED:
            reward += 5

        elif speed < MODERATE_SPEED:
            reward -= 10
        
    if next_point in slow_part:
        if speed > LOW_SPEED and speed <= MODERATE_SPEED:
            reward += 10

        elif speed > 0 and speed <= LOW_SPEED:
            reward += 5

        elif speed == 0:
            reward -= 10
    
    
    '''penalize if the agent steers too much to prevent zigzag in fast part'''
    STEERING_ANGLE_THRESHOLD = 10.0
    if next_point in fast_part:
        if steering_angle > STEERING_ANGLE_THRESHOLD:
            reward -=10

    '''reward if the agent finish the lap faster than the benchmark time'''
    BENCHMARK_TIME = 12
    TARGET_TIME = 10
    RUNNING_TIME = round(steps/16.6,1)
    
    if progress == 100:
        if RUNNING_TIME > TARGET_TIME and RUNNING_TIME <= BENCHMARK_TIME:
            reward += (1/RUNNING_TIME)*100

        elif RUNNING_TIME <= TARGET_TIME:
            reward += (1/RUNNING_TIME)*200

    return float(reward)




