Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions firmware/main/apps/app_avatar/app_avatar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <smooth_lvgl.hpp>
#include <stackchan/stackchan.h>
#include <apps/common/common.h>
#include <hal/board/hal_bridge.h>
#include <string_view>
#include <cstdint>
#include <memory>
Expand Down Expand Up @@ -89,6 +90,13 @@ void AppAvatar::onOpen()
avatar->getPanel()->onClick().connect([&]() { _screen_clicked_flag = true; });
GetStackChan().attachAvatar(std::move(avatar));

auto gaze_modifier = std::make_unique<ConversationGazeModifier>(ConversationGazeModifier::Mode::Listening);
gaze_modifier->setTargetProvider([](float& target_x, float& target_y) {
auto* camera = hal_bridge::board_get_camera();
return camera && camera->DetectVisualTarget(target_x, target_y);
});
_conversation_gaze_modifier_id = GetStackChan().addModifier(std::move(gaze_modifier));

/* ------------------------------- BLE events ------------------------------- */
GetHAL().onBleAvatarData.connect([&](const char* data) {
std::lock_guard<std::mutex> lock(_mutex);
Expand Down Expand Up @@ -271,6 +279,11 @@ void AppAvatar::onClose()
GetStackChan().resetAvatar();
_video_window.reset();

if (_conversation_gaze_modifier_id >= 0) {
GetStackChan().removeModifier(_conversation_gaze_modifier_id);
_conversation_gaze_modifier_id = -1;
}

GetHAL().onBleAvatarData.clear();
GetHAL().onBleMotionData.clear();

Expand Down
3 changes: 2 additions & 1 deletion firmware/main/apps/app_avatar/app_avatar.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class AppAvatar : public mooncake::AppAbility {
std::unique_ptr<view::VideoWindow> _video_window;

bool _screen_clicked_flag = false;
int _dance_modifier_id = -1;
int _dance_modifier_id = -1;
int _conversation_gaze_modifier_id = -1;

void check_auto_angle_sync_mode();
};
251 changes: 251 additions & 0 deletions firmware/main/hal/board/stackchan_camera.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#include <unistd.h>
#include <errno.h>
#include <esp_heap_caps.h>
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <cstring>

#include "esp_imgfx_color_convert.h"
Expand Down Expand Up @@ -58,6 +60,147 @@

#define TAG "StackChanCamera"

namespace {

constexpr int kTrackingGridWidth = 16;
constexpr int kTrackingGridHeight = 12;

struct VisualSample {
uint8_t y = 0;
uint8_t cb = 128;
uint8_t cr = 128;
uint8_t r = 0;
uint8_t g = 0;
uint8_t b = 0;
bool has_yuv = false;
bool has_rgb = false;
};

uint8_t clamp_u8(int value)
{
if (value < 0) {
return 0;
}
if (value > 255) {
return 255;
}
return static_cast<uint8_t>(value);
}

void yuv_to_rgb(VisualSample& sample)
{
const int c = static_cast<int>(sample.y) - 16;
const int d = static_cast<int>(sample.cb) - 128;
const int e = static_cast<int>(sample.cr) - 128;
sample.r = clamp_u8((298 * c + 409 * e + 128) >> 8);
sample.g = clamp_u8((298 * c - 100 * d - 208 * e + 128) >> 8);
sample.b = clamp_u8((298 * c + 516 * d + 128) >> 8);
sample.has_rgb = true;
}

void rgb_to_luma(VisualSample& sample)
{
sample.y = static_cast<uint8_t>((77 * sample.r + 150 * sample.g + 29 * sample.b) >> 8);
}

bool read_visual_sample(const uint8_t* data, size_t len, v4l2_pix_fmt_t format, int width, int height, int x, int y,
VisualSample& sample)
{
if (!data || width <= 0 || height <= 0 || x < 0 || y < 0 || x >= width || y >= height) {
return false;
}

switch (format) {
case V4L2_PIX_FMT_YUV422P:
case V4L2_PIX_FMT_YUYV: {
const size_t pair_offset = (static_cast<size_t>(y) * width + (x & ~1)) * 2;
if (pair_offset + 3 >= len) {
return false;
}
sample.y = data[pair_offset + ((x & 1) ? 2 : 0)];
sample.cb = data[pair_offset + 1];
sample.cr = data[pair_offset + 3];
sample.has_yuv = true;
yuv_to_rgb(sample);
return true;
}
case V4L2_PIX_FMT_RGB565:
case V4L2_PIX_FMT_RGB565X: {
const size_t offset = (static_cast<size_t>(y) * width + x) * 2;
if (offset + 1 >= len) {
return false;
}
uint16_t pixel = static_cast<uint16_t>(data[offset]) | (static_cast<uint16_t>(data[offset + 1]) << 8);
if (format == V4L2_PIX_FMT_RGB565X) {
pixel = __builtin_bswap16(pixel);
}
sample.r = static_cast<uint8_t>(((pixel >> 11) & 0x1F) << 3);
sample.g = static_cast<uint8_t>(((pixel >> 5) & 0x3F) << 2);
sample.b = static_cast<uint8_t>((pixel & 0x1F) << 3);
sample.has_rgb = true;
rgb_to_luma(sample);
return true;
}
case V4L2_PIX_FMT_RGB24: {
const size_t offset = (static_cast<size_t>(y) * width + x) * 3;
if (offset + 2 >= len) {
return false;
}
sample.r = data[offset];
sample.g = data[offset + 1];
sample.b = data[offset + 2];
sample.has_rgb = true;
rgb_to_luma(sample);
return true;
}
case V4L2_PIX_FMT_GREY:
case V4L2_PIX_FMT_YUV420: {
const size_t offset = static_cast<size_t>(y) * width + x;
if (offset >= len) {
return false;
}
sample.y = data[offset];
return true;
}
default:
return false;
}
}

int skin_score(const VisualSample& sample)
{
if (sample.has_yuv && sample.y > 45 && sample.cb >= 70 && sample.cb <= 140 && sample.cr >= 128 &&
sample.cr <= 190 && sample.cr > sample.cb + 5) {
return 16;
}

if (sample.has_rgb) {
const int max_rgb = std::max(static_cast<int>(sample.r), std::max(static_cast<int>(sample.g),
static_cast<int>(sample.b)));
const int min_rgb = std::min(static_cast<int>(sample.r), std::min(static_cast<int>(sample.g),
static_cast<int>(sample.b)));
if (sample.r > 70 && sample.g > 35 && sample.b > 20 && sample.r > sample.g && sample.r > sample.b &&
max_rgb - min_rgb > 15 && std::abs(static_cast<int>(sample.r) - static_cast<int>(sample.g)) > 8) {
return 12;
}
}

return 0;
}

float clamp_unit(float value)
{
if (value < -1.0f) {
return -1.0f;
}
if (value > 1.0f) {
return 1.0f;
}
return value;
}

} // namespace

#if defined(CONFIG_CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER) || defined(CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP)
#warning \
"CAMERA_SENSOR_SWAP_PIXEL_BYTE_ORDER or CONFIG_XIAOZHI_ENABLE_CAMERA_ENDIANNESS_SWAP is enabled, which may cause image corruption in YUV422 format!"
Expand Down Expand Up @@ -398,6 +541,8 @@ bool StackChanCamera::Capture()
encoder_thread_.join();
}

std::lock_guard<std::mutex> lock(capture_mutex_);

if (!streaming_on_ || video_fd_ < 0) {
return false;
}
Expand Down Expand Up @@ -857,6 +1002,8 @@ bool StackChanCamera::StreamCaptures()
encoder_thread_.join();
}

std::lock_guard<std::mutex> lock(capture_mutex_);

if (!streaming_on_ || video_fd_ < 0) {
return false;
}
Expand Down Expand Up @@ -967,6 +1114,110 @@ bool StackChanCamera::StreamCaptures()
return true;
}

bool StackChanCamera::DetectVisualTarget(float& target_x, float& target_y, int* confidence)
{
if (confidence) {
*confidence = 0;
}

std::lock_guard<std::mutex> lock(capture_mutex_);

if (!streaming_on_ || video_fd_ < 0) {
return false;
}

struct v4l2_buffer buf = {};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
if (ioctl(video_fd_, VIDIOC_DQBUF, &buf) != 0) {
ESP_LOGD(TAG, "VIDIOC_DQBUF failed during visual tracking: errno=%d(%s)", errno, strerror(errno));
return false;
}

bool detected = false;
do {
if (buf.index >= mmap_buffers_.size() || !mmap_buffers_[buf.index].start || buf.bytesused == 0) {
break;
}

int width = frame_.width;
int height = frame_.height;
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
width = sensor_width_;
height = sensor_height_;
#endif // CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
if (width <= 0 || height <= 0) {
break;
}

const auto* data = static_cast<const uint8_t*>(mmap_buffers_[buf.index].start);
uint32_t weighted_x = 0;
uint32_t weighted_y = 0;
uint32_t total_weight = 0;
uint16_t hit_count = 0;

for (int gy = 0; gy < kTrackingGridHeight; ++gy) {
const int y = (height * (gy * 2 + 1)) / (kTrackingGridHeight * 2);
for (int gx = 0; gx < kTrackingGridWidth; ++gx) {
const int x = (width * (gx * 2 + 1)) / (kTrackingGridWidth * 2);

VisualSample sample;
if (!read_visual_sample(data, buf.bytesused, sensor_format_, width, height, x, y, sample)) {
continue;
}

const int grid_index = gy * kTrackingGridWidth + gx;
int motion_score = 0;
if (tracking_luma_initialized_) {
const int diff = std::abs(static_cast<int>(sample.y) - static_cast<int>(tracking_luma_[grid_index]));
if (diff > 16) {
motion_score = std::min(diff / 3, 20);
}
tracking_luma_[grid_index] =
static_cast<uint8_t>((static_cast<int>(tracking_luma_[grid_index]) * 3 + sample.y) / 4);
} else {
tracking_luma_[grid_index] = sample.y;
}

const int weight = skin_score(sample) * 3 + motion_score;
if (weight < 12) {
continue;
}

weighted_x += static_cast<uint32_t>(x * weight);
weighted_y += static_cast<uint32_t>(y * weight);
total_weight += weight;
hit_count++;
}
}
tracking_luma_initialized_ = true;

if (hit_count < 3 || total_weight < 60) {
break;
}

const float center_x = static_cast<float>(weighted_x) / static_cast<float>(total_weight);
const float center_y = static_cast<float>(weighted_y) / static_cast<float>(total_weight);
const float norm_x = (center_x - (static_cast<float>(width) - 1.0f) * 0.5f) /
((static_cast<float>(width) - 1.0f) * 0.5f);
const float norm_y = (center_y - (static_cast<float>(height) - 1.0f) * 0.5f) /
((static_cast<float>(height) - 1.0f) * 0.5f);

target_x = clamp_unit(norm_x * 2.40f);
target_y = clamp_unit(-norm_y * 1.80f);
if (confidence) {
*confidence = std::min<int>(100, total_weight / 8);
}
detected = true;
} while (false);

if (ioctl(video_fd_, VIDIOC_QBUF, &buf) != 0) {
ESP_LOGE(TAG, "VIDIOC_QBUF failed after visual tracking");
}

return detected;
}

bool StackChanCamera::SetHMirror(bool enabled)
{
if (video_fd_ < 0) return false;
Expand Down
5 changes: 5 additions & 0 deletions firmware/main/hal/board/stackchan_camera.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <lvgl.h>
#include <thread>
#include <memory>
#include <mutex>
#include <vector>

#include <freertos/FreeRTOS.h>
Expand All @@ -29,6 +30,9 @@ class StackChanCamera : public Camera {
v4l2_pix_fmt_t format = 0;
} frame_;
v4l2_pix_fmt_t sensor_format_ = 0;
std::mutex capture_mutex_;
uint8_t tracking_luma_[16 * 12] = {};
bool tracking_luma_initialized_ = false;
#ifdef CONFIG_XIAOZHI_ENABLE_ROTATE_CAMERA_IMAGE
uint16_t sensor_width_ = 0;
uint16_t sensor_height_ = 0;
Expand All @@ -51,6 +55,7 @@ class StackChanCamera : public Camera {
virtual void SetExplainUrl(const std::string& url, const std::string& token);
virtual bool Capture() override;
bool StreamCaptures();
bool DetectVisualTarget(float& target_x, float& target_y, int* confidence = nullptr);

// 翻转控制函数
virtual bool SetHMirror(bool enabled) override;
Expand Down
Loading