# 4.5 基于Agent的自主屋内搜索

本节的任务，就是赋予无人机目标定位能力

让无人机能够自主搜索飞行

利用目标识别和目标定位能力，获得每个目标和无人机的的距离和角度，也就是如：

[('yellow duck', 15.375, -21.425346762452097), ('flower', 14.7421875, -42.059604476553346), ('coca cola', 14.703125, -12.83171588735092), ('mirror', 13.203125, 6.831911236078974)]

具体的能力函数如下：

In [5]:
from typing import List,Tuple

def ob_objects(obj_name_list:List[str])-> List[Tuple[str, float, float]]:
    """
    对无人机获得的图像进行目标定位，获得目标列表 [ (对象名称、距离、角度（以度为单位）),...]

    Args:
        obj_name_list: 目标名称列表

    Returns:
        List: [(对象名称、和无人机的距离、和无人机的角度（以度为单位）>,...]
    """
    
    #step1, 目标检测
    prompt = ".".join(obj_name_list)
    # obj_id_list: [obj1, obj2,...], obj_locs: [[xmin, ymin, xmax, ymax],[xmin, ymin, xmax, ymax],...]
    obj_id_list, obj_locs = detect(prompt)

    #step2, 获得深度视觉数据
    responses = client.simGetImages([
        # png format
        airsim.ImageRequest(0, airsim.ImageType.Scene, pixels_as_float=False, compress=True),
    
        # floating point uncompressed image，深度图, 像素点代表到相平面距离
        airsim.ImageRequest(0, airsim.ImageType.DepthPlanar, pixels_as_float=True),
    
        # 像素点代表的到相机的距离
        airsim.ImageRequest(0, airsim.ImageType.DepthPerspective, pixels_as_float=True)
      ]
    )
    
    img_depth_planar = np.array(responses[1].image_data_float).reshape(responses[1].height, responses[0].width)
    img_depth_perspective = np.array(responses[2].image_data_float).reshape(responses[2].height, responses[1].width)
    
    #一般图片
    image_data = responses[0].image_data_uint8
    img = cv2.imdecode(np.array(bytearray(image_data), dtype='uint8'), cv2.IMREAD_UNCHANGED)  # 从二进制图片数据中读
    img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)  # 4通道转3
    

    final_obj_list = [] #最终结果列表，目标中心点的定位位置
    #构建目标结果
    index = 0
    for bbox in obj_locs:
        center_x = int((bbox[0] + bbox[2]) / 2)
        center_y = int((bbox[1] + bbox[3]) / 2)

        depth_distance = self.img_depth_planar[center_y, center_x, ] #相平面距离
        camera_distance = self.img_depth_perspective[center_y, center_x] #相机距离

        #求角度
        angel = math.acos(depth_distance / camera_distance)
        angel_degree = math.degrees(angel)

        # 判断正负，左边为正，右边为负，只看偏航角
        if center_x < self.img.shape[1] / 2:
            # 如果目标在图像的左侧，向左转，degree 为负数
            angel_degree = -1 * angel_degree

        obj_name =  phrases[index]#获得目标名称，可能有多个

        obj_info = (obj_name, camera_distance, angel_degree)
        final_obj_list.append(obj_info)
        index = index + 1

    return final_obj_list

为了方便无人机根据目标的距离和角度飞向目标，增加了两个辅助函数：

In [None]:
def turn(angle: float)->str:
    """
    无人机旋转angle角度

    Args:
        angle: 无人机需要旋转的角度（以度为单位）
        
    Returns:
        str: 成功状态描述
    """
    yaw_degree = get_yaw()
    yaw_degree = yaw_degree + angle
    set_yaw(yaw_degree)
    return "成功"

def move(distance: float)->str:
    """
    向前移动distance米的距离

    Args:
        distance: 无人机向前移动的距离，单位为米
        
    Returns:
        str: 成功状态描述
    """
    step_length = distance
    cur_position = get_drone_position()
    yaw_degree = cur_position[3]
    #将角度转换为弧度
    yaw = math.radians(yaw_degree)
    #向前移动0.1米
    x = cur_position[0] + step_length*math.cos(yaw)
    y = cur_position[1] + step_length*math.sin(yaw)
    z = cur_position[2]
    fly_to([x, y, z, 0])
    return "成功"

## 开始实验

In [1]:
# !pip install smolagents[litellm]
from smolagents import CodeAgent, LiteLLMModel

model = LiteLLMModel(
    model_id="volcengine/doubao-1-5-pro-32k-250115", # This model is a bit weak for agentic behaviours though
    api_base="https://ark.cn-beijing.volces.com/api/v3", # replace with 127.0.0.1:11434 or remote open-ai compatible server if necessary
    api_key="ffd77d7c-f420-4b69-8557-80e7fa85c8b9", # replace with API key if necessary，写自己的key
    flatten_messages_as_text=True, #不写多步骤可能会出错
)

In [2]:
from airsim_smol_wrapper import *

In [9]:
#先飞进屋内
takeoff()
set_yaw(90)
for _ in range(5):
    forward()

Connected!
Client Ver:1 (Min Req: 1), Server Ver:1 (Min Req: 1)



In [10]:
agent = CodeAgent(tools=[turn, move, watch], model=model)

In [6]:
command = """
我想吃点水果，看看屋里面还有吗？
"""

result = agent.run(
    command
)

In [11]:
agent = CodeAgent(tools=[turn, move, ob_objects, watch], model=model)

In [12]:
command = """
请飞到电视机前面1米左右，看看电视开了没有
"""

result = agent.run(
    command
)

In [13]:
reset()