Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
2452f8b
[update] add vad model.update sherpa-onnx head file.
Abandon-ht Dec 17, 2024
dd3279d
[update] add depth_anything model.
Abandon-ht Dec 17, 2024
795e74f
[fix] image format.
Abandon-ht Dec 19, 2024
6833b93
[update] update depth_anything.
Abandon-ht Dec 19, 2024
801d692
[update] depth_anything supports uvc input & base64 jpg output.
Abandon-ht Dec 19, 2024
095343a
[update] perf kws & melotts & vad model setup.
Abandon-ht Dec 20, 2024
71ba7e4
[update] perf yolo seg & pose result.update vlm supports internvl2.5.…
Abandon-ht Dec 30, 2024
324de59
[update] add point number function parameter to YOLO-Pose
Abandon-ht Dec 31, 2024
089bfaf
[fix] Increase tokenizer init timeout.
Abandon-ht Jan 6, 2025
125038e
[add] add mode_internvl2.5, mode_yolo11n-hand-pose.
Abandon-ht Jan 6, 2025
3d843bf
[fix] camera frame error
dianjixz Jan 7, 2025
e8d3b36
[update] ax-samples && [update] version
dianjixz Jan 7, 2025
db8d051
[update] llm-sys version
dianjixz Jan 7, 2025
faf5506
[update] add: depth_anything model, qwen2.5-1.5B model, yolo11n-hand-…
Abandon-ht Jan 9, 2025
41aab10
update static_lib version
Abandon-ht Jan 11, 2025
d942981
update whisper
Abandon-ht Jan 11, 2025
d91c62c
update whisper runner
Abandon-ht Jan 11, 2025
acf245e
[update] llm_sys add sys_push and sys_pull code
dianjixz Jan 11, 2025
e13ab7a
Merge branch 'dev' of github.com:m5stack/StackFlow into dev
dianjixz Jan 11, 2025
b678240
upload qwen2.5-1.5B model json
Abandon-ht Jan 11, 2025
0b387ca
[update] llm_pack.py ThreadPool
dianjixz Jan 11, 2025
c2156b2
upload whisper-tiny.json
Abandon-ht Jan 11, 2025
169a595
[fix] fix whisper bug
Abandon-ht Jan 13, 2025
e8e4cc9
[update] update vad & whisper
Abandon-ht Jan 13, 2025
c8ed5ec
[update] update static_lib version v0.0.6 to v0.0.7. update main_whis…
Abandon-ht Jan 14, 2025
0ed9518
[update] update whisper
Abandon-ht Jan 14, 2025
ee276a3
[update] update llm_pack.py add llm_vad & llm_whisper.
Abandon-ht Jan 14, 2025
8a4e7e0
[update] update vad & whisper, vad support kws, whisper suport kws.
Abandon-ht Jan 15, 2025
29ad0ee
[fix] static_lib version auto update
dianjixz Jan 15, 2025
28d65f4
[update] llm supports input from whisper. perf whisper endpoint method.
Abandon-ht Jan 15, 2025
a830d44
[clean]
dianjixz Jan 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.vscode/settings.json
projects/core135_llm_product_test_ui
projects/imx678_test
projects/imx678_test
projects/test_*
5 changes: 2 additions & 3 deletions ext_components/ax-samples/SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,13 @@ if "CONFIG_AX_SAMPLES_ENABLED" in os.environ:
LINK_SEARCH_PATH = []

INCLUDE += [
os.path.join(env["GIT_REPO_LISTS"]["ax-samples"]["path"], "examples/base"),
os.path.join(env["GIT_REPO_LISTS"]["ax-samples"]["path"], "examples/utilities"),
os.path.join(env["GIT_REPO_LISTS"]["ax-samples"]["path"], "examples"),
]
if "CONFIG_AX_620E_MSP_ENABLED" in os.environ:
INCLUDE += [
os.path.join(
env["GIT_REPO_LISTS"]["ax-samples"]["path"],
"examples/ax620e/middleware",
"examples/ax620e",
)
]

Expand Down
2 changes: 1 addition & 1 deletion projects/llm_framework/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ send :
"action":"setup",
"object":"yolo.setup",
"data":{
"model":"yolo11n_anquanmao",
"model":"yolo11n",
"response_format":"yolo.yolobox",
"input":"camera.1000",
"enoutput":true
Expand Down
4 changes: 3 additions & 1 deletion projects/llm_framework/SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import shutil
os.environ['SDK_PATH'] = os.path.normpath(str(Path(os.getcwd())/'..'/'..'/'SDK'))
os.environ['EXT_COMPONENTS_PATH'] = os.path.normpath(str(Path(os.getcwd())/'..'/'..'/'ext_components'))

version = 'v0.0.5'
version = 'v0.0.7'
static_lib = 'static_lib'
update = False

Expand All @@ -26,4 +26,6 @@ if update:
exec(f.read())
down_url = "https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/static_lib_{}.tar.gz".format(version)
down_path = check_wget_down(down_url, "static_lib_{}.tar.gz".format(version))
if os.path.exists(static_lib):
shutil.rmtree(static_lib)
shutil.move(down_path, static_lib)
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// sherpa-onnx/csrc/fast-clustering-config.h
//
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FAST_CLUSTERING_CONFIG_H_
#define SHERPA_ONNX_CSRC_FAST_CLUSTERING_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct FastClusteringConfig {
// If greater than 0, then threshold is ignored.
//
// We strongly recommend that you set it if you know the number of clusters
// in advance
int32_t num_clusters = -1;

// distance threshold.
//
// The smaller, the more clusters it will generate.
// The larger, the fewer clusters it will generate.
float threshold = 0.5;

FastClusteringConfig() = default;

FastClusteringConfig(int32_t num_clusters, float threshold)
: num_clusters(num_clusters), threshold(threshold) {}

std::string ToString() const;

void Register(ParseOptions *po);
bool Validate() const;
};

} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_FAST_CLUSTERING_CONFIG_H_
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// sherpa-onnx/csrc/fast-clustering.h
//
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FAST_CLUSTERING_H_
#define SHERPA_ONNX_CSRC_FAST_CLUSTERING_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/fast-clustering-config.h"

namespace sherpa_onnx {

class FastClustering {
public:
explicit FastClustering(const FastClusteringConfig &config);
~FastClustering();

/**
* @param features Pointer to a 2-D feature matrix in row major. Each row
* is a feature frame. It is changed in-place. We will
* convert each feature frame to a normalized vector.
* That is, the L2-norm of each vector will be equal to 1.
* It uses cosine dissimilarity,
* which is 1 - (cosine similarity)
* @param num_rows Number of feature frames
* @param num-cols The feature dimension.
*
* @return Return a vector of size num_rows. ans[i] contains the label
* for the i-th feature frame, i.e., the i-th row of the feature
* matrix.
*/
std::vector<int32_t> Cluster(float *features, int32_t num_rows,
int32_t num_cols) const;

private:
class Impl;
std::unique_ptr<Impl> impl_;
};

} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_FAST_CLUSTERING_H_
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct FeatureExtractorConfig {
float frame_length_ms = 25.0f; // in milliseconds.
bool is_librosa = false;
bool remove_dc_offset = true; // Subtract mean of wave before FFT.
float preemph_coeff = 0.97f; // Preemphasis coefficient.
std::string window_type = "povey"; // e.g. Hamming window

// For models from NeMo
Expand Down
18 changes: 18 additions & 0 deletions projects/llm_framework/include/sherpa/sherpa-onnx/csrc/fst-utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// sherpa-onnx/csrc/fst-utils.h
//
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FST_UTILS_H_
#define SHERPA_ONNX_CSRC_FST_UTILS_H_

#include <string>

#include "fst/fstlib.h"

namespace sherpa_onnx {

fst::Fst<fst::StdArc> *ReadGraph(const std::string &filename);

}

#endif // SHERPA_ONNX_CSRC_FST_UTILS_H_
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,13 @@ struct Hypothesis {
// LM log prob if any.
double lm_log_prob = 0;

// the nn lm score for next token given the current ys
// the nn lm score for next token given the current ys,
// when using shallow fusion
CopyableOrtValue nn_lm_scores;

// cur scored tokens by RNN LM, when rescoring
int32_t cur_scored_pos = 0;

// the nn lm states
std::vector<CopyableOrtValue> nn_lm_states;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,25 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
public:
explicit KeywordSpotterTransducerImpl(const KeywordSpotterConfig &config)
: config_(config),
model_(OnlineTransducerModel::Create(config.model_config)),
sym_(config.model_config.tokens) {
model_(OnlineTransducerModel::Create(config.model_config)) {
if (!config.model_config.tokens_buf.empty()) {
sym_ = SymbolTable(config.model_config.tokens_buf, false);
} else {
/// assuming tokens_buf and tokens are guaranteed not being both empty
sym_ = SymbolTable(config.model_config.tokens, true);
}

if (sym_.Contains("<unk>")) {
unk_id_ = sym_["<unk>"];
}

model_->SetFeatureDim(config.feat_config.feature_dim);

InitKeywords();
if (config.keywords_buf.empty()) {
InitKeywords();
} else {
InitKeywordsFromBufStr();
}

decoder_ = std::make_unique<TransducerKeywordDecoder>(
model_.get(), config_.max_active_paths, config_.num_trailing_blanks,
Expand Down Expand Up @@ -305,6 +315,12 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
}
#endif

void InitKeywordsFromBufStr() {
// keywords_buf's content is supposed to be same as the keywords_file's
std::istringstream is(config_.keywords_buf);
InitKeywords(is);
}

void InitOnlineStream(OnlineStream *stream) const {
auto r = decoder_->GetEmptyResult();
SHERPA_ONNX_CHECK_EQ(r.hyps.Size(), 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ struct KeywordSpotterConfig {

std::string keywords_file;

/// if keywords_buf is non-empty,
/// the keywords will be loaded from the buffer instead of from the
/// "keywrods_file"
std::string keywords_buf;

KeywordSpotterConfig() = default;

KeywordSpotterConfig(const FeatureExtractorConfig &feat_config,
Expand Down
15 changes: 5 additions & 10 deletions projects/llm_framework/include/sherpa/sherpa-onnx/csrc/lexicon.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,13 @@
#define SHERPA_ONNX_CSRC_LEXICON_H_

#include <cstdint>
#include <istream>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/offline-tts-frontend.h"

namespace sherpa_onnx {
Expand All @@ -30,11 +26,10 @@ class Lexicon : public OfflineTtsFrontend {
const std::string &punctuations, const std::string &language,
bool debug = false);

#if __ANDROID_API__ >= 9
Lexicon(AAssetManager *mgr, const std::string &lexicon,
const std::string &tokens, const std::string &punctuations,
const std::string &language, bool debug = false);
#endif
template <typename Manager>
Lexicon(Manager *mgr, const std::string &lexicon, const std::string &tokens,
const std::string &punctuations, const std::string &language,
bool debug = false);

std::vector<TokenIDs> ConvertTextToTokenIds(
const std::string &text, const std::string &voice = "") const override;
Expand Down
Loading