In [2]:
import sounddevice as sd
import numpy as np
from IPython.display import Audio

ModuleNotFoundError: No module named 'sounddevice'

In [None]:
devices = sd.query_devices()
default_input_device_idx = sd.default.device[0]
print(f'Use default device: {devices[default_input_device_idx]["name"]}')
sample_rate = 16000
samples_per_read = int(0.1 * sample_rate)
default_output_device_idx = sd.default.device[1]
print(f'Use default device: {devices[default_output_device_idx]["name"]}')

Use default device: 麦克风阵列 (Realtek(R) Audio)
Use default device: 扬声器 (Realtek(R) Audio)


In [None]:
with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
    pcm = np.array([], dtype=np.float32)
    for i in range(50):
        samples, _ = s.read(samples_per_read)
        samples = samples.reshape(-1)
        pcm = np.concatenate((pcm, samples), axis=0)
Audio(pcm, rate=sample_rate)

In [None]:
sd.play(pcm, samplerate=sample_rate)

In [None]:
import sherpa_onnx
from sherpa_onnx import OfflineRecognizer
from typing import Union
import librosa

class ASR:
    def __init__(self):
        self._recognizer = OfflineRecognizer()
        raise NotImplementedError

    def transcribe(self, audio: Union[str, np.ndarray], sample_rate=16000) -> str:
        if isinstance(audio, str):
            audio, _ = librosa.load(audio, sr=sample_rate)
        s = self._recognizer.create_stream()
        s.accept_waveform(sample_rate, audio)
        self._recognizer.decode_stream(s)
        return s.result.text


class Whisper(ASR):
    def __init__(self, encoder_path: str, decoder_path: str, tokens_path: str, num_threads: int = 8, provider: str = 'cpu'):
        self._recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=encoder_path,
            decoder=decoder_path,
            tokens=tokens_path,
            num_threads=num_threads,
            provider=provider,
        )


class Paraformer(ASR):
    def __init__(self, model_path: str, tokens_path: str, num_threads: int = 8, provider: str = 'cpu'):
        self._recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=model_path,
            tokens=tokens_path,
            num_threads=num_threads,
            provider=provider,
        )

In [None]:
asr = Paraformer(
    model_path='model/ASR/sherpa-onnx-paraformer-zh-small-2024-03-09/model.int8.onnx',
    tokens_path='model/ASR/sherpa-onnx-paraformer-zh-small-2024-03-09/tokens.txt',
    # provider='cuda',
)
asr.transcribe(r'model\ASR\sherpa-onnx-paraformer-zh-small-2024-03-09\test_wavs\0.wav', sample_rate=sample_rate)

'对我做了介绍啊那么我想说的是呢大家如果对我的研究感兴趣呢嗯'

In [None]:
from sherpa_onnx import VadModelConfig, SileroVadModelConfig, VoiceActivityDetector
config = VadModelConfig(
    SileroVadModelConfig(
        model='model/VAD/silero_vad.onnx',
        min_silence_duration=0.25,
    ),
    sample_rate=sample_rate
)
window_size = config.silero_vad.window_size
vad = VoiceActivityDetector(config, buffer_size_in_seconds=100)
samples_per_read = int(0.1 * sample_rate)

In [None]:
buffer = []
with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
    while True:
        samples, _ = s.read(samples_per_read)  # a blocking read
        samples = samples.reshape(-1)

        buffer = np.concatenate([buffer, samples])
        while len(buffer) > window_size:
            vad.accept_waveform(buffer[:window_size])
            buffer = buffer[window_size:]

        while not vad.empty():
            text = asr.transcribe(vad.front.samples, sample_rate=sample_rate)

            vad.pop()
            if len(text):
                print(text)

对是的
然后你现在没法证明你的八十块钱是合理对吧
嗯
这个话我
就是可以证明的只是我当当下现在证明不了嗯
这部分
思维就是说做做这种问题不是说从一条思维链走下来之后就结束了
你你你整个销售是一套体系不是说我自己觉得我挣个百分之挣个三十块钱就我自己觉得合适这这种吗
你把别的条路都走出来了
因为本来就那些东西就是需要有的


KeyboardInterrupt: 

In [1]:
from sentence_transformers import SentenceTransformer

In [126]:
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

In [11]:
sentences = [
    "The weather is lovely today.",
    "It's so sunny outside!",
    "He drove to the stadium.",
]
embeddings = model.encode(sentences)
print(embeddings.shape)

(3, 768)


In [12]:
similarities = model.similarity(embeddings, embeddings)
print(similarities)

tensor([[1.0000, 0.5067, 0.1559],
        [0.5067, 1.0000, 0.1257],
        [0.1559, 0.1257, 1.0000]])


In [127]:
dataset = [
    # 前进 (20)
    {"text": "前进三米", "label": "前进"},
    {"text": "请向前移动", "label": "前进"},
    {"text": "Go ahead", "label": "前进"},
    {"text": "直行please", "label": "前进"},
    {"text": "forward前进", "label": "前进"},
    {"text": "继续向前", "label": "前进"},
    {"text": "往前开", "label": "前进"},
    {"text": "Move forward", "label": "前进"},
    {"text": "保持直行", "label": "前进"},
    {"text": "proceed前进", "label": "前进"},
    {"text": "油门加速", "label": "前进"},
    {"text": "drive直行", "label": "前进"},
    {"text": "直线行驶", "label": "前进"},
    {"text": "加速前进", "label": "前进"},
    {"text": "持续向前", "label": "前进"},
    {"text": "go straight", "label": "前进"},
    {"text": "向前推进", "label": "前进"},
    {"text": "全速前进", "label": "前进"},
    {"text": "前进行驶", "label": "前进"},
    {"text": "前进now", "label": "前进"},

    # 后退 (20)
    {"text": "倒车操作", "label": "后退"},
    {"text": "请后退", "label": "后退"},
    {"text": "back后退", "label": "后退"},
    {"text": "reverse倒车", "label": "后退"},
    {"text": "向后移动", "label": "后退"},
    {"text": "退后两米", "label": "后退"},
    {"text": "倒车please", "label": "后退"},
    {"text": "Move back", "label": "后退"},
    {"text": "后退三秒", "label": "后退"},
    {"text": "retreat后退", "label": "后退"},
    {"text": "倒车模式", "label": "后退"},
    {"text": "退后now", "label": "后退"},
    {"text": "向后行驶", "label": "后退"},
    {"text": "倒车动作", "label": "后退"},
    {"text": "backing后退", "label": "后退"},
    {"text": "缓慢倒车", "label": "后退"},
    {"text": "退后操作", "label": "后退"},
    {"text": "倒车进行", "label": "后退"},
    {"text": "reverse模式", "label": "后退"},
    {"text": "后撤动作", "label": "后退"},

    # 左转 (20)
    {"text": "左转please", "label": "左转"},
    {"text": "向左转弯", "label": "左转"},
    {"text": "turn left", "label": "左转"},
    {"text": "左转操作", "label": "左转"},
    {"text": "左打方向", "label": "左转"},
    {"text": "左转now", "label": "左转"},
    {"text": "left转", "label": "左转"},
    {"text": "向左行驶", "label": "左转"},
    {"text": "左转动作", "label": "左转"},
    {"text": "左向转弯", "label": "左转"},
    {"text": "左转进行", "label": "左转"},
    {"text": "转向左边", "label": "左转"},
    {"text": "turn_left", "label": "左转"},
    {"text": "左弯道", "label": "左转"},
    {"text": "左转执行", "label": "左转"},
    {"text": "left转弯", "label": "左转"},
    {"text": "左方转弯", "label": "左转"},
    {"text": "左转please", "label": "左转"},
    {"text": "向左转向", "label": "左转"},
    {"text": "左转操作中", "label": "左转"},

    # 右转 (20)
    {"text": "右转操作", "label": "右转"},
    {"text": "turn right", "label": "右转"},
    {"text": "向右转弯", "label": "右转"},
    {"text": "右转now", "label": "右转"},
    {"text": "right转", "label": "右转"},
    {"text": "向右行驶", "label": "右转"},
    {"text": "右打方向", "label": "右转"},
    {"text": "右转please", "label": "右转"},
    {"text": "右转动作", "label": "右转"},
    {"text": "右向转弯", "label": "右转"},
    {"text": "转向右边", "label": "右转"},
    {"text": "turn_right", "label": "右转"},
    {"text": "右弯道", "label": "右转"},
    {"text": "右转执行", "label": "右转"},
    {"text": "right转弯", "label": "右转"},
    {"text": "右方转弯", "label": "右转"},
    {"text": "向右转向", "label": "右转"},
    {"text": "右转进行中", "label": "右转"},
    {"text": "右转模式", "label": "右转"},
    {"text": "右转操作中", "label": "右转"},

    # 无操作 (20)
    {"text": "stop停止", "label": "无操作"},
    {"text": "保持不动", "label": "无操作"},
    {"text": "no action", "label": "无操作"},
    {"text": "取消操作", "label": "无操作"},
    {"text": "pause暂停", "label": "无操作"},
    {"text": "维持原状", "label": "无操作"},
    {"text": "hold保持", "label": "无操作"},
    {"text": "停止stop", "label": "无操作"},
    {"text": "不做动作", "label": "无操作"},
    {"text": "freeze冻结", "label": "无操作"},
    {"text": "取消cancel", "label": "无操作"},
    {"text": "维持现状", "label": "无操作"},
    {"text": "不执行", "label": "无操作"},
    {"text": "保持静止", "label": "无操作"},
    {"text": "ignore忽略", "label": "无操作"},
    {"text": "暂停pause", "label": "无操作"},
    {"text": "停止操作", "label": "无操作"},
    {"text": "no operation", "label": "无操作"},
    {"text": "终止指令", "label": "无操作"},
    {"text": "保持当前", "label": "无操作"},
    # 前进（含否定混淆）
    {"text": "前进不要停止", "label": "前进"},
    {"text": "继续前进no stop", "label": "前进"},
    {"text": "go forward不要停", "label": "前进"},
    {"text": "向前推进禁止暂停", "label": "前进"},
    {"text": "保持前进non-stop", "label": "前进"},
    {"text": "前进ignore取消", "label": "前进"},
    {"text": "forward持续", "label": "前进"},
    {"text": "proceed继续", "label": "前进"},
    {"text": "油门保持", "label": "前进"},
    {"text": "drive前进不停止", "label": "前进"},
    {"text": "直线行驶勿停", "label": "前进"},
    {"text": "accelerate加速", "label": "前进"},
    {"text": "持续向前推进", "label": "前进"},
    {"text": "go不要停", "label": "前进"},
    {"text": "全速推进", "label": "前进"},
    {"text": "前进持续now", "label": "前进"},
    {"text": "move持续", "label": "前进"},
    {"text": "keep going", "label": "前进"},
    {"text": "维持前进", "label": "前进"},
    {"text": "不要停继续走", "label": "前进"},

    # 后退（含反向指令）
    {"text": "后退的反方向", "label": "前进"},  # 特殊混淆案例
    {"text": "倒车不要停", "label": "后退"},
    {"text": "reverse持续", "label": "后退"},
    {"text": "向后移动non-stop", "label": "后退"},
    {"text": "保持后退", "label": "后退"},
    {"text": "退后ignore取消", "label": "后退"},
    {"text": "倒车请继续", "label": "后退"},
    {"text": "retreat不要停", "label": "后退"},
    {"text": "back持续", "label": "后退"},
    {"text": "后撤动作保持", "label": "后退"},
    {"text": "倒车进行中", "label": "后退"},
    {"text": "退后now", "label": "后退"},
    {"text": "持续reverse", "label": "后退"},
    {"text": "保持倒车", "label": "后退"},
    {"text": "后退勿停", "label": "后退"},
    {"text": "倒车不停止", "label": "后退"},
    {"text": "backing持续", "label": "后退"},
    {"text": "退后动作继续", "label": "后退"},
    {"text": "reverse保持", "label": "后退"},
    {"text": "后撤进行", "label": "后退"},

    # 左转（含反向逻辑）
    {"text": "左转的反方向", "label": "右转"},  # 关键混淆案例
    {"text": "不要右转", "label": "无操作"},     # 反向否定
    {"text": "turn left不要停", "label": "左转"},
    {"text": "左转持续", "label": "左转"},
    {"text": "向左保持转向", "label": "左转"},
    {"text": "左转non-stop", "label": "左转"},
    {"text": "left持续", "label": "左转"},
    {"text": "禁止右转", "label": "左转"},     # 反向否定
    {"text": "左打方向不停止", "label": "左转"},
    {"text": "turn_left继续", "label": "左转"},
    {"text": "左转进行中", "label": "左转"},
    {"text": "保持左转", "label": "左转"},
    {"text": "左转勿停", "label": "左转"},
    {"text": "left保持", "label": "左转"},
    {"text": "左转动作持续", "label": "左转"},
    {"text": "转向左继续", "label": "左转"},
    {"text": "左弯道保持", "label": "左转"},
    {"text": "左转执行中", "label": "左转"},
    {"text": "左方转向持续", "label": "左转"},
    {"text": "不要向右", "label": "左转"},     # 反向否定

    # 右转（含否定混淆）
    {"text": "右转的反方向", "label": "左转"},  # 关键混淆案例
    {"text": "不要左转", "label": "无操作"},     # 反向否定
    {"text": "turn right不要停", "label": "右转"},
    {"text": "右转持续", "label": "右转"},
    {"text": "向右保持转向", "label": "右转"},
    {"text": "禁止左转", "label": "右转"},     # 反向否定
    {"text": "right持续", "label": "右转"},
    {"text": "右打方向不停止", "label": "右转"},
    {"text": "turn_right继续", "label": "右转"},
    {"text": "右转进行中", "label": "右转"},
    {"text": "保持右转", "label": "右转"},
    {"text": "右转勿停", "label": "右转"},
    {"text": "right保持", "label": "右转"},
    {"text": "右转动作持续", "label": "右转"},
    {"text": "转向右继续", "label": "右转"},
    {"text": "右弯道保持", "label": "右转"},
    {"text": "右转执行中", "label": "右转"},
    {"text": "右方转向持续", "label": "右转"},
    {"text": "不要向左", "label": "右转"},     # 反向否定
    {"text": "右转non-stop", "label": "右转"},

    # 无操作（强化否定）
    {"text": "不要前进", "label": "无操作"},    # 关键混淆案例
    {"text": "停止stop前进", "label": "无操作"},
    {"text": "cancel左转", "label": "无操作"},
    {"text": "禁止右转操作", "label": "无操作"}, # 特殊否定
    {"text": "no后退", "label": "无操作"},
    {"text": "hold不要动", "label": "无操作"},
    {"text": "ignore前进", "label": "无操作"},
    {"text": "停止所有turn", "label": "无操作"},
    {"text": "freeze保持", "label": "无操作"},
    {"text": "不要任何action", "label": "无操作"},
    {"text": "取消move", "label": "无操作"},
    {"text": "no operation保持", "label": "无操作"},
    {"text": "禁止proceed", "label": "无操作"},
    {"text": "停止drive", "label": "无操作"},
    {"text": "pause所有动作", "label": "无操作"},
    {"text": "不要accelerate", "label": "无操作"},
    {"text": "取消reverse", "label": "无操作"},
    {"text": "ignore转向", "label": "无操作"},
    {"text": "保持no移动", "label": "无操作"},
    {"text": "停止一切action", "label": "无操作"}
]



In [163]:
dataset = []
with open('dataset.csv') as fp:
    for line in fp:
        l = line.strip().split(' | ')
        text = l[0]
        label = l[1]
        dataset.append({"text": text, "label": label})


import random
random.seed(42)
random.shuffle(dataset)

train_dataset = dataset[:int(len(dataset) * 0.7)]
test_dataset = dataset[int(len(dataset) * 0.3):]

X_train = []
Y_train = []
for data in train_dataset:
    text = data["text"]
    label = data["label"]
    embedding = model.encode(text)
    X_train.append(embedding)
    Y_train.append(label)

X_test = []
Y_test = []
for data in test_dataset:
    text = data["text"]
    label = data["label"]
    embedding = model.encode(text)
    X_test.append(embedding)
    Y_test.append(label)

import numpy as np
X = np.concatenate([X_train, X_test], axis=0)
Y = np.concatenate([Y_train, Y_test], axis=0)

In [None]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(max_iter=200000, hidden_layer_sizes=(256, 128, 64))
mlp.fit(X_train, Y_train)

correct_count = 0
for x, y in zip(X, Y):
    pred = mlp.predict([x])
    if pred[0] == y:
        correct_count += 1
print(f"Training Accuracy: {correct_count / len(X_train) * 100:.2f}%")

correct_count = 0
for x, y in zip(X_test, Y_test):
    pred = mlp.predict([x])
    if pred[0] == y:
        correct_count += 1
print(f"Testing Accuracy: {correct_count / len(X_test) * 100:.2f}%")

Testing Accuracy: 97.70%


In [173]:
s = '往后走点'
print(mlp.predict_proba([model.encode(s)])[0])
print(mlp.predict([model.encode(s)])[0])

[6.96494430e-02 8.41587340e-07 9.30346247e-01 2.56431708e-06
 9.04396936e-07]
后退


In [145]:
mlp.predict_proba([model.encode('别左转了，也别前进，后退一点')])

array([[0.03922766, 0.00432879, 0.47356503, 0.13389787, 0.34898064]])

In [136]:
mlp.predict([model.encode('别左转了，也别前进，后退一点')])[0]

np.str_('后退')

['model/MLP/command_classifier.pkl']

In [16]:
test_dataset = [
]

T = []
X_test = []
Y_test = []
for data in test_dataset:
    text = data["text"]
    label = data["label"]
    embedding = model.encode(text)
    T.append(text)
    X_test.append(embedding)
    Y_test.append(label)

correct_count = 0
for x, y in zip(X_test, Y_test):
    pred = mlp.predict([x])
    if pred[0] == y:
        correct_count += 1
print(f"Test Accuracy: {correct_count / len(X_test) * 100:.2f}%")

Test Accuracy: 63.89%


In [43]:
from sklearn.linear_model import RidgeClassifier

ridge = RidgeClassifier()

ridge.fit(X, Y)
correct_count = 0
for text, x, y in zip(T, X_test, Y_test):
    pred = mlp.predict([x])
    # print(f'文本：{text}, 预测：{pred[0]}, 实际：{y}')
    if pred[0] == y:
        correct_count += 1
print(f"Test Accuracy: {correct_count / len(X_test) * 100:.2f}%")

Test Accuracy: 60.00%
