In [1]:
import json
import uuid
import gzip
import asyncio
import websockets
import numpy as np
import sounddevice as sd
import nest_asyncio

# 配置参数
appid = "4166554764"    # 项目的 appid
token = "ggmUTHHMXio-nJlKMkRvqEgkcWyfDK0K"    # 项目的 token
cluster = "volcengine_streaming_common"  # 请求的集群

# 协议常量
PROTOCOL_VERSION = 0b0001
DEFAULT_HEADER_SIZE = 0b0001

PROTOCOL_VERSION_BITS = 4
HEADER_BITS = 4
MESSAGE_TYPE_BITS = 4
MESSAGE_TYPE_SPECIFIC_FLAGS_BITS = 4
MESSAGE_SERIALIZATION_BITS = 4
MESSAGE_COMPRESSION_BITS = 4
RESERVED_BITS = 8

# Message Type:
CLIENT_FULL_REQUEST = 0b0001
CLIENT_AUDIO_ONLY_REQUEST = 0b0010
SERVER_FULL_RESPONSE = 0b1001
SERVER_ACK = 0b1011
SERVER_ERROR_RESPONSE = 0b1111

# Message Type Specific Flags
NO_SEQUENCE = 0b0000
POS_SEQUENCE = 0b0001
NEG_SEQUENCE = 0b0010
NEG_SEQUENCE_1 = 0b0011

# Message Serialization
NO_SERIALIZATION = 0b0000
JSON = 0b0001
THRIFT = 0b0011
CUSTOM_TYPE = 0b1111

# Message Compression
NO_COMPRESSION = 0b0000
GZIP = 0b0001
CUSTOM_COMPRESSION = 0b1111

def generate_header(
    version=PROTOCOL_VERSION,
    message_type=CLIENT_FULL_REQUEST,
    message_type_specific_flags=NO_SEQUENCE,
    serial_method=JSON,
    compression_type=GZIP,
    reserved_data=0x00,
    extension_header=bytes()
):
    header = bytearray()
    header_size = int(len(extension_header) / 4) + 1
    header.append((version << 4) | header_size)
    header.append((message_type << 4) | message_type_specific_flags)
    header.append((serial_method << 4) | compression_type)
    header.append(reserved_data)
    header.extend(extension_header)
    return header

def generate_full_default_header():
    return generate_header()

def generate_audio_default_header():
    return generate_header(
        message_type=CLIENT_AUDIO_ONLY_REQUEST
    )

def parse_response(res):
    """解析响应"""
    try:
        protocol_version = res[0] >> 4
        header_size = res[0] & 0x0f
        message_type = res[1] >> 4
        message_type_specific_flags = res[1] & 0x0f
        serialization_method = res[2] >> 4
        message_compression = res[2] & 0x0f
        reserved = res[3]
        header_extensions = res[4:header_size * 4]
        payload = res[header_size * 4:]
        result = {}
        payload_msg = None
        payload_size = 0
        
        if message_type == SERVER_FULL_RESPONSE:
            payload_size = int.from_bytes(payload[:4], "big", signed=True)
            payload_msg = payload[4:]
        elif message_type == SERVER_ACK:
            seq = int.from_bytes(payload[:4], "big", signed=True)
            result['seq'] = seq
            if len(payload) >= 8:
                payload_size = int.from_bytes(payload[4:8], "big", signed=False)
                payload_msg = payload[8:]
        elif message_type == SERVER_ERROR_RESPONSE:
            code = int.from_bytes(payload[:4], "big", signed=False)
            result['code'] = code
            payload_size = int.from_bytes(payload[4:8], "big", signed=False)
            payload_msg = payload[8:]
            
        if payload_msg is None:
            return result
            
        if message_compression == GZIP:
            payload_msg = gzip.decompress(payload_msg)
            
        if serialization_method == JSON:
            payload_msg = json.loads(str(payload_msg, "utf-8"))
        elif serialization_method != NO_SERIALIZATION:
            payload_msg = str(payload_msg, "utf-8")
            
        result['payload_msg'] = payload_msg
        result['payload_size'] = payload_size
        return result
    except Exception as e:
        return {"error": f"Failed to parse response: {str(e)}"}

class AsrWsClient:
    def __init__(self, appid, token, cluster):
        self.appid = appid
        self.token = token
        self.cluster = cluster
        self.ws_url = "wss://openspeech.bytedance.com/api/v2/asr"
        self.success_code = 1000
        self.uid = "streaming_asr_demo"
        self.workflow = "audio_in,resample,partition,vad,fe,decode,itn,nlu_punctuate"
        self.show_language = False
        self.show_utterances = True
        self.result_type = "single"
        self.format = "raw"
        self.rate = 16000
        self.language = "zh-CN"
        self.bits = 16
        self.channel = 1
        self.codec = "raw"
        self.auth_method = "token"

    def construct_request(self, reqid):
        return {
            'app': {
                'appid': self.appid,
                'cluster': self.cluster,
                'token': self.token,
            },
            'user': {
                'uid': self.uid
            },
            'request': {
                'reqid': reqid,
                'nbest': 1,
                'workflow': self.workflow,
                'show_language': self.show_language,
                'show_utterances': self.show_utterances,
                'result_type': self.result_type,
                'sequence': 1
            },
            'audio': {
                'format': self.format,
                'rate': self.rate,
                'language': self.language,
                'bits': self.bits,
                'channel': self.channel,
                'codec': self.codec
            }
        }

    def token_auth(self):
        return {'Authorization': f'Bearer; {self.token}'}

    async def process_microphone(self):
        """实时麦克风录音并识别"""
        reqid = str(uuid.uuid4())
        request_params = self.construct_request(reqid)
        
        # 构造初始请求
        payload_bytes = str.encode(json.dumps(request_params))
        payload_bytes = gzip.compress(payload_bytes)
        full_request = bytearray(generate_full_default_header())
        full_request.extend(len(payload_bytes).to_bytes(4, 'big'))
        full_request.extend(payload_bytes)

        print("建立WebSocket连接...")
        async with websockets.connect(
            self.ws_url, 
            extra_headers=self.token_auth(), 
            max_size=1000000000
        ) as ws:
            # 发送初始请求
            await ws.send(full_request)
            response = await ws.recv()
            result = parse_response(response)
            print(f"初始化响应: {result}")
            
            if 'payload_msg' in result and result['payload_msg']['code'] == self.success_code:
                print("初始化成功")
                print("录音任务已启动")
                chunk_size = 9600  # 每次读取的采样点数
                
                with sd.InputStream(
                    channels=1, 
                    samplerate=16000,
                    dtype=np.int16,
                    blocksize=chunk_size,
                    callback=None
                ) as stream:
                    print("开始录音...")
                    try:
                        while True:
                            # 读取音频数据
                            audio_data, overflowed = stream.read(chunk_size)
                            if overflowed:
                                print("警告：音频缓冲区溢出")
                                
                            # 转换为字节
                            audio_bytes = audio_data.tobytes()
                            
                            # 压缩音频数据
                            compressed_audio = gzip.compress(audio_bytes)
                            
                            # 构造音频数据请求
                            audio_request = bytearray(generate_audio_default_header())
                            audio_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                            audio_request.extend(compressed_audio)
                            
                            # 发送音频数据
                            await ws.send(audio_request)
                            
                            # 接收识别结果
                            response = await ws.recv()
                            result = parse_response(response)
                            
                            # 处理识别结果
                            if 'payload_msg' in result and 'result' in result['payload_msg']:
                                utterances = result['payload_msg']['result'][0].get('utterances', [])
                                for utterance in utterances:
                                    if not utterance['definite']:
                                        print(f"\r[识别中...] {utterance['text']}", end='', flush=True)
                                    else:
                                        print(f"\n[最终结果] {utterance['text']}")
                    except KeyboardInterrupt:
                        # 发送最后一个音频包
                        last_request = bytearray(generate_last_audio_default_header())
                        last_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                        last_request.extend(compressed_audio)
                        await ws.send(last_request)
                        print("\n录音已停止")
                    except Exception as e:
                        print(f"录音过程发生错误: {e}")
            else:
                print(f"初始化失败: {result['payload_msg'].get('message')}")

# 在notebook中运行异步代码的辅助函数
import nest_asyncio
nest_asyncio.apply()

# 创建客户端实例并运行
client = AsrWsClient(appid, token, cluster)
try:
    loop = asyncio.get_event_loop()
    loop.run_until_complete(client.process_microphone())
except KeyboardInterrupt:
    print("\n程序已被用户中断")
except Exception as e:
    print(f"\n程序异常: {e}")
finally:
    print("程序已退出")


建立WebSocket连接...
初始化响应: {'payload_msg': {'addition': {'duration': '0', 'logid': '2024102815461634AE6D600FE3D86C2D88'}, 'code': 1000, 'message': 'Success', 'reqid': '29b0dd87-ee32-4f86-aad8-4c03205cf61e', 'sequence': 1}, 'payload_size': 157}
初始化成功
录音任务已启动
开始录音...
[识别中...] 文轩儿做作业文轩儿你每天在这搞来搞去的
[最终结果] 文轩做作业，文轩你每天在这搞来搞去的？
[识别中...] 
[最终结果] 哎。
[识别中...] 闻一下
[最终结果] 闻一下儿，

程序已被用户中断
程序已退出


In [None]:
from unihiker import GUI
import time

# 创建GUI实例
gui = GUI()

# 创建文本对象，居中显示
text = gui.draw_text(
    x=120,  # 屏幕中心x坐标
    y=160,  # 屏幕中心y坐标
    text="Hello",
    font_size=20,
    origin='center'  # 居中对齐
)

# 循环更新文字
while True:
    # 显示 "Hello"
    text.config(text="Hello")
    time.sleep(2)
    
    # 显示 "World"
    text.config(text="World")
    time.sleep(2)
    
    # 如果在Mac系统上运行，需要手动更新GUI
    if gui.master.winfo_exists():  # 检查窗口是否还存在
        gui.update()

GUI is cleared because of reinit
程序开始运行
主程序开始运行
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received
连接错误: sent 1007 (invalid data); no close frame received


In [None]:
星空板文字跟新DEMO

In [8]:
from unihiker import GUI
import time

# 初始化GUI
gui = GUI()

# 创建文本对象
title = gui.draw_text(
    x=120,  # 屏幕中心x坐标
    y=60,   # 上方位置
    text="语音识别测试",
    font_size=24,
    origin='center'  # 居中对齐
)

status = gui.draw_text(
    x=120,  # 屏幕中心x坐标
    y=160,  # 屏幕中心y坐标
    text="测试中...",
    font_size=20,
    origin='center'  # 居中对齐
)

# 测试文本更新
count = 0
while True:
    try:
        count += 1
        status.config(text=f"测试计数: {count}")
        print(f"更新计数: {count}")
        time.sleep(1)
    except KeyboardInterrupt:
        break
    except Exception as e:
        print(f"错误: {e}")
        break

GUI is cleared because of reinit
更新计数: 1
更新计数: 2
更新计数: 3
更新计数: 4
更新计数: 5
更新计数: 6
更新计数: 7
更新计数: 8
更新计数: 9
更新计数: 10
更新计数: 11
更新计数: 12
更新计数: 13
更新计数: 14
更新计数: 15
更新计数: 16
更新计数: 17
更新计数: 18
更新计数: 19
更新计数: 20
更新计数: 21
更新计数: 22
更新计数: 23
更新计数: 24
更新计数: 25
更新计数: 26
更新计数: 27
更新计数: 28
更新计数: 29
更新计数: 30
更新计数: 31
更新计数: 32
更新计数: 33
更新计数: 34
更新计数: 35
更新计数: 36
更新计数: 37
更新计数: 38
更新计数: 39
更新计数: 40
更新计数: 41
更新计数: 42
更新计数: 43
更新计数: 44
更新计数: 45
更新计数: 46
更新计数: 47
更新计数: 48
更新计数: 49
更新计数: 50
更新计数: 51
更新计数: 52
更新计数: 53
更新计数: 54
更新计数: 55
更新计数: 56
更新计数: 57
更新计数: 58
更新计数: 59
更新计数: 60
更新计数: 61
更新计数: 62
更新计数: 63
更新计数: 64
更新计数: 65
更新计数: 66
更新计数: 67
更新计数: 68
更新计数: 69
更新计数: 70
更新计数: 71
更新计数: 72
更新计数: 73
更新计数: 74
更新计数: 75
更新计数: 76
更新计数: 77
更新计数: 78
更新计数: 79
更新计数: 80
更新计数: 81
更新计数: 82
更新计数: 83
更新计数: 84
更新计数: 85
更新计数: 86
更新计数: 87
更新计数: 88
更新计数: 89
更新计数: 90
更新计数: 91
更新计数: 92
更新计数: 93
更新计数: 94
更新计数: 95
更新计数: 96
更新计数: 97
更新计数: 98
更新计数: 99
更新计数: 100
更新计数: 101
更新计数: 102
更新计数: 103
更新计数: 104
更新计数: 105
更新计数: 106
更新计数: 107
更新计数:

In [None]:
能在屏幕正常显示识别文字，不能换行

In [9]:
import json
import uuid
import gzip
import asyncio
import websockets
import numpy as np
import sounddevice as sd
import nest_asyncio
from unihiker import GUI
import time

# 配置参数
appid = "4166554764"    # 项目的 appid
token = "ggmUTHHMXio-nJlKMkRvqEgkcWyfDK0K"    # 项目的 token
cluster = "volcengine_streaming_common"  # 请求的集群

# 协议常量
PROTOCOL_VERSION = 0b0001
DEFAULT_HEADER_SIZE = 0b0001

PROTOCOL_VERSION_BITS = 4
HEADER_BITS = 4
MESSAGE_TYPE_BITS = 4
MESSAGE_TYPE_SPECIFIC_FLAGS_BITS = 4
MESSAGE_SERIALIZATION_BITS = 4
MESSAGE_COMPRESSION_BITS = 4
RESERVED_BITS = 8

# Message Type:
CLIENT_FULL_REQUEST = 0b0001
CLIENT_AUDIO_ONLY_REQUEST = 0b0010
SERVER_FULL_RESPONSE = 0b1001
SERVER_ACK = 0b1011
SERVER_ERROR_RESPONSE = 0b1111

# Message Type Specific Flags
NO_SEQUENCE = 0b0000
POS_SEQUENCE = 0b0001
NEG_SEQUENCE = 0b0010
NEG_SEQUENCE_1 = 0b0011

# Message Serialization
NO_SERIALIZATION = 0b0000
JSON = 0b0001
THRIFT = 0b0011
CUSTOM_TYPE = 0b1111

# Message Compression
NO_COMPRESSION = 0b0000
GZIP = 0b0001
CUSTOM_COMPRESSION = 0b1111




# 初始化GUI
gui = GUI()

# 创建文本对象
title = gui.draw_text(
    x=120,  # 屏幕中心x坐标
    y=60,   # 上方位置
    text="语音识别测试",
    font_size=24,
    origin='center'  # 居中对齐
)

status = gui.draw_text(
    x=120,  # 屏幕中心x坐标
    y=160,  # 屏幕中心y坐标
    text="测试中...",
    font_size=20,
    origin='center'  # 居中对齐
)

def generate_header(
    version=PROTOCOL_VERSION,
    message_type=CLIENT_FULL_REQUEST,
    message_type_specific_flags=NO_SEQUENCE,
    serial_method=JSON,
    compression_type=GZIP,
    reserved_data=0x00,
    extension_header=bytes()
):
    header = bytearray()
    header_size = int(len(extension_header) / 4) + 1
    header.append((version << 4) | header_size)
    header.append((message_type << 4) | message_type_specific_flags)
    header.append((serial_method << 4) | compression_type)
    header.append(reserved_data)
    header.extend(extension_header)
    return header

def generate_full_default_header():
    return generate_header()

def generate_audio_default_header():
    return generate_header(
        message_type=CLIENT_AUDIO_ONLY_REQUEST
    )

def parse_response(res):
    """解析响应"""
    try:
        protocol_version = res[0] >> 4
        header_size = res[0] & 0x0f
        message_type = res[1] >> 4
        message_type_specific_flags = res[1] & 0x0f
        serialization_method = res[2] >> 4
        message_compression = res[2] & 0x0f
        reserved = res[3]
        header_extensions = res[4:header_size * 4]
        payload = res[header_size * 4:]
        result = {}
        payload_msg = None
        payload_size = 0
        
        if message_type == SERVER_FULL_RESPONSE:
            payload_size = int.from_bytes(payload[:4], "big", signed=True)
            payload_msg = payload[4:]
        elif message_type == SERVER_ACK:
            seq = int.from_bytes(payload[:4], "big", signed=True)
            result['seq'] = seq
            if len(payload) >= 8:
                payload_size = int.from_bytes(payload[4:8], "big", signed=False)
                payload_msg = payload[8:]
        elif message_type == SERVER_ERROR_RESPONSE:
            code = int.from_bytes(payload[:4], "big", signed=False)
            result['code'] = code
            payload_size = int.from_bytes(payload[4:8], "big", signed=False)
            payload_msg = payload[8:]
            
        if payload_msg is None:
            return result
            
        if message_compression == GZIP:
            payload_msg = gzip.decompress(payload_msg)
            
        if serialization_method == JSON:
            payload_msg = json.loads(str(payload_msg, "utf-8"))
        elif serialization_method != NO_SERIALIZATION:
            payload_msg = str(payload_msg, "utf-8")
            
        result['payload_msg'] = payload_msg
        result['payload_size'] = payload_size
        return result
    except Exception as e:
        return {"error": f"Failed to parse response: {str(e)}"}

class AsrWsClient:
    def __init__(self, appid, token, cluster):
        self.appid = appid
        self.token = token
        self.cluster = cluster
        self.ws_url = "wss://openspeech.bytedance.com/api/v2/asr"
        self.success_code = 1000
        self.uid = "streaming_asr_demo"
        self.workflow = "audio_in,resample,partition,vad,fe,decode,itn,nlu_punctuate"
        self.show_language = False
        self.show_utterances = True
        self.result_type = "single"
        self.format = "raw"
        self.rate = 16000
        self.language = "zh-CN"
        self.bits = 16
        self.channel = 1
        self.codec = "raw"
        self.auth_method = "token"

    def construct_request(self, reqid):
        return {
            'app': {
                'appid': self.appid,
                'cluster': self.cluster,
                'token': self.token,
            },
            'user': {
                'uid': self.uid
            },
            'request': {
                'reqid': reqid,
                'nbest': 1,
                'workflow': self.workflow,
                'show_language': self.show_language,
                'show_utterances': self.show_utterances,
                'result_type': self.result_type,
                'sequence': 1
            },
            'audio': {
                'format': self.format,
                'rate': self.rate,
                'language': self.language,
                'bits': self.bits,
                'channel': self.channel,
                'codec': self.codec
            }
        }

    def token_auth(self):
        return {'Authorization': f'Bearer; {self.token}'}

    async def process_microphone(self):
        """实时麦克风录音并识别"""
        reqid = str(uuid.uuid4())
        request_params = self.construct_request(reqid)
        
        # 构造初始请求
        payload_bytes = str.encode(json.dumps(request_params))
        payload_bytes = gzip.compress(payload_bytes)
        full_request = bytearray(generate_full_default_header())
        full_request.extend(len(payload_bytes).to_bytes(4, 'big'))
        full_request.extend(payload_bytes)

        print("建立WebSocket连接...")
        async with websockets.connect(
            self.ws_url, 
            extra_headers=self.token_auth(), 
            max_size=1000000000
        ) as ws:
            # 发送初始请求
            await ws.send(full_request)
            response = await ws.recv()
            result = parse_response(response)
            print(f"初始化响应: {result}")
            
            if 'payload_msg' in result and result['payload_msg']['code'] == self.success_code:
                print("初始化成功")
                print("录音任务已启动")
                chunk_size = 9600  # 每次读取的采样点数
                
                with sd.InputStream(
                    channels=1, 
                    samplerate=16000,
                    dtype=np.int16,
                    blocksize=chunk_size,
                    callback=None
                ) as stream:
                    print("开始录音...")
                    try:
                        while True:
                            # 读取音频数据
                            audio_data, overflowed = stream.read(chunk_size)
                            if overflowed:
                                print("警告：音频缓冲区溢出")
                                
                            # 转换为字节
                            audio_bytes = audio_data.tobytes()
                            
                            # 压缩音频数据
                            compressed_audio = gzip.compress(audio_bytes)
                            
                            # 构造音频数据请求
                            audio_request = bytearray(generate_audio_default_header())
                            audio_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                            audio_request.extend(compressed_audio)
                            
                            # 发送音频数据
                            await ws.send(audio_request)
                            
                            # 接收识别结果
                            response = await ws.recv()
                            result = parse_response(response)
                            
                            # 处理识别结果
                            if 'payload_msg' in result and 'result' in result['payload_msg']:
                                utterances = result['payload_msg']['result'][0].get('utterances', [])
                                for utterance in utterances:
                                    if not utterance['definite']:
                                        print(f"\r[识别中...] {utterance['text']}", end='', flush=True)
                                    else:
                                        print(f"\n[最终结果] {utterance['text']}")
                                        status.config(text=f"测试计数: {utterance['text']}")
                    except KeyboardInterrupt:
                        # 发送最后一个音频包
                        last_request = bytearray(generate_last_audio_default_header())
                        last_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                        last_request.extend(compressed_audio)
                        await ws.send(last_request)
                        print("\n录音已停止")
                    except Exception as e:
                        print(f"录音过程发生错误: {e}")
            else:
                print(f"初始化失败: {result['payload_msg'].get('message')}")

# 在notebook中运行异步代码的辅助函数
import nest_asyncio
nest_asyncio.apply()

# 创建客户端实例并运行
client = AsrWsClient(appid, token, cluster)
try:
    loop = asyncio.get_event_loop()
    loop.run_until_complete(client.process_microphone())
except KeyboardInterrupt:
    print("\n程序已被用户中断")
except Exception as e:
    print(f"\n程序异常: {e}")
finally:
    print("程序已退出")


GUI is cleared because of reinit
建立WebSocket连接...
初始化响应: {'payload_msg': {'addition': {'duration': '0', 'logid': '20241028133209D01E9DF5E142E06A090C'}, 'code': 1000, 'message': 'Success', 'reqid': '450ebaf9-b679-44cc-a9d8-4b3e0af28cda', 'sequence': 1}, 'payload_size': 159}
初始化成功
录音任务已启动
开始录音...
[识别中...] 老婆
[最终结果] 老婆？
[识别中...] 对于姥姥家我只知道上述的一点
[最终结果] 对于姥姥家，我只知道上述的一点。

程序异常: name 'generate_last_audio_default_header' is not defined
程序已退出


In [3]:
import json
import uuid
import gzip
import asyncio
import websockets
import numpy as np
import sounddevice as sd
import nest_asyncio
from unihiker import GUI
import time

# 配置参数
appid = "4166554764"    # 项目的 appid
token = "ggmUTHHMXio-nJlKMkRvqEgkcWyfDK0K"    # 项目的 token
cluster = "volcengine_streaming_common"  # 请求的集群

# 协议常量
PROTOCOL_VERSION = 0b0001
DEFAULT_HEADER_SIZE = 0b0001

PROTOCOL_VERSION_BITS = 4
HEADER_BITS = 4
MESSAGE_TYPE_BITS = 4
MESSAGE_TYPE_SPECIFIC_FLAGS_BITS = 4
MESSAGE_SERIALIZATION_BITS = 4
MESSAGE_COMPRESSION_BITS = 4
RESERVED_BITS = 8

# Message Type:
CLIENT_FULL_REQUEST = 0b0001
CLIENT_AUDIO_ONLY_REQUEST = 0b0010
SERVER_FULL_RESPONSE = 0b1001
SERVER_ACK = 0b1011
SERVER_ERROR_RESPONSE = 0b1111

# Message Type Specific Flags
NO_SEQUENCE = 0b0000
POS_SEQUENCE = 0b0001
NEG_SEQUENCE = 0b0010
NEG_SEQUENCE_1 = 0b0011

# Message Serialization
NO_SERIALIZATION = 0b0000
JSON = 0b0001
THRIFT = 0b0011
CUSTOM_TYPE = 0b1111

# Message Compression
NO_COMPRESSION = 0b0000
GZIP = 0b0001
CUSTOM_COMPRESSION = 0b1111




# 初始化GUI
gui = GUI()

# 创建标题
title = gui.draw_text(
    x=120,
    y=30,
    text="语音识别系统",
    font_size=24,
    origin='center'
)

# 创建文本显示区域
text_box = gui.add_text_box(
    x=10,
    y=70,
    w=220,  # 宽度
    h=200,  # 高度
    text=""  # 初始为空
)

# 创建状态显示
status = gui.draw_text(
    x=120,
    y=280,
    text="准备中...",
    font_size=16,
    origin='center'
)

# 用于存储完整文本
full_text = ""

def update_text_display(new_text):
    """更新文本显示，累积新内容"""
    global full_text
    
    # 添加新文本
    if new_text:
        full_text = full_text + new_text + "\n"
        
        # 更新显示
        text_box.config(text=full_text)

def generate_header(
    version=PROTOCOL_VERSION,
    message_type=CLIENT_FULL_REQUEST,
    message_type_specific_flags=NO_SEQUENCE,
    serial_method=JSON,
    compression_type=GZIP,
    reserved_data=0x00,
    extension_header=bytes()
):
    header = bytearray()
    header_size = int(len(extension_header) / 4) + 1
    header.append((version << 4) | header_size)
    header.append((message_type << 4) | message_type_specific_flags)
    header.append((serial_method << 4) | compression_type)
    header.append(reserved_data)
    header.extend(extension_header)
    return header

def generate_full_default_header():
    return generate_header()

def generate_audio_default_header():
    return generate_header(
        message_type=CLIENT_AUDIO_ONLY_REQUEST
    )

def parse_response(res):
    """解析响应"""
    try:
        protocol_version = res[0] >> 4
        header_size = res[0] & 0x0f
        message_type = res[1] >> 4
        message_type_specific_flags = res[1] & 0x0f
        serialization_method = res[2] >> 4
        message_compression = res[2] & 0x0f
        reserved = res[3]
        header_extensions = res[4:header_size * 4]
        payload = res[header_size * 4:]
        result = {}
        payload_msg = None
        payload_size = 0
        
        if message_type == SERVER_FULL_RESPONSE:
            payload_size = int.from_bytes(payload[:4], "big", signed=True)
            payload_msg = payload[4:]
        elif message_type == SERVER_ACK:
            seq = int.from_bytes(payload[:4], "big", signed=True)
            result['seq'] = seq
            if len(payload) >= 8:
                payload_size = int.from_bytes(payload[4:8], "big", signed=False)
                payload_msg = payload[8:]
        elif message_type == SERVER_ERROR_RESPONSE:
            code = int.from_bytes(payload[:4], "big", signed=False)
            result['code'] = code
            payload_size = int.from_bytes(payload[4:8], "big", signed=False)
            payload_msg = payload[8:]
            
        if payload_msg is None:
            return result
            
        if message_compression == GZIP:
            payload_msg = gzip.decompress(payload_msg)
            
        if serialization_method == JSON:
            payload_msg = json.loads(str(payload_msg, "utf-8"))
        elif serialization_method != NO_SERIALIZATION:
            payload_msg = str(payload_msg, "utf-8")
            
        result['payload_msg'] = payload_msg
        result['payload_size'] = payload_size
        return result
    except Exception as e:
        return {"error": f"Failed to parse response: {str(e)}"}

class AsrWsClient:
    def __init__(self, appid, token, cluster):
        self.appid = appid
        self.token = token
        self.cluster = cluster
        self.ws_url = "wss://openspeech.bytedance.com/api/v2/asr"
        self.success_code = 1000
        self.uid = "streaming_asr_demo"
        self.workflow = "audio_in,resample,partition,vad,fe,decode,itn,nlu_punctuate"
        self.show_language = False
        self.show_utterances = True
        self.result_type = "single"
        self.format = "raw"
        self.rate = 16000
        self.language = "zh-CN"
        self.bits = 16
        self.channel = 1
        self.codec = "raw"
        self.auth_method = "token"

    def construct_request(self, reqid):
        return {
            'app': {
                'appid': self.appid,
                'cluster': self.cluster,
                'token': self.token,
            },
            'user': {
                'uid': self.uid
            },
            'request': {
                'reqid': reqid,
                'nbest': 1,
                'workflow': self.workflow,
                'show_language': self.show_language,
                'show_utterances': self.show_utterances,
                'result_type': self.result_type,
                'sequence': 1
            },
            'audio': {
                'format': self.format,
                'rate': self.rate,
                'language': self.language,
                'bits': self.bits,
                'channel': self.channel,
                'codec': self.codec
            }
        }

    def token_auth(self):
        return {'Authorization': f'Bearer; {self.token}'}

    async def process_microphone(self):
        """实时麦克风录音并识别"""
        reqid = str(uuid.uuid4())
        request_params = self.construct_request(reqid)
        
        # 构造初始请求
        payload_bytes = str.encode(json.dumps(request_params))
        payload_bytes = gzip.compress(payload_bytes)
        full_request = bytearray(generate_full_default_header())
        full_request.extend(len(payload_bytes).to_bytes(4, 'big'))
        full_request.extend(payload_bytes)

        print("建立WebSocket连接...")
        async with websockets.connect(
            self.ws_url, 
            extra_headers=self.token_auth(), 
            max_size=1000000000
        ) as ws:
            # 发送初始请求
            await ws.send(full_request)
            response = await ws.recv()
            result = parse_response(response)
            print(f"初始化响应: {result}")
            
            if 'payload_msg' in result and result['payload_msg']['code'] == self.success_code:
                print("初始化成功")
                print("录音任务已启动")
                chunk_size = 9600  # 每次读取的采样点数
                
                with sd.InputStream(
                    channels=1, 
                    samplerate=16000,
                    dtype=np.int16,
                    blocksize=chunk_size,
                    callback=None
                ) as stream:
                    print("开始录音...")
                    try:
                        while True:
                            # 读取音频数据
                            audio_data, overflowed = stream.read(chunk_size)
                            if overflowed:
                                print("警告：音频缓冲区溢出")
                                
                            # 转换为字节
                            audio_bytes = audio_data.tobytes()
                            
                            # 压缩音频数据
                            compressed_audio = gzip.compress(audio_bytes)
                            
                            # 构造音频数据请求
                            audio_request = bytearray(generate_audio_default_header())
                            audio_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                            audio_request.extend(compressed_audio)
                            
                            # 发送音频数据
                            await ws.send(audio_request)
                            
                            # 接收识别结果
                            response = await ws.recv()
                            result = parse_response(response)
                            
                            # 处理识别结果
                            if 'payload_msg' in result and 'result' in result['payload_msg']:
                                utterances = result['payload_msg']['result'][0].get('utterances', [])
                                for utterance in utterances:
                                    if not utterance['definite']:
                                        print(f"\r[识别中...] {utterance['text']}", end='', flush=True)
                                    else:
                                        print(f"\n[最终结果] {utterance['text']}")
                                        update_text_display(utterance['text'])
                    except KeyboardInterrupt:
                        # 发送最后一个音频包
                        last_request = bytearray(generate_last_audio_default_header())
                        last_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                        last_request.extend(compressed_audio)
                        await ws.send(last_request)
                        print("\n录音已停止")
                    except Exception as e:
                        print(f"录音过程发生错误: {e}")
            else:
                print(f"初始化失败: {result['payload_msg'].get('message')}")

# 在notebook中运行异步代码的辅助函数
import nest_asyncio
nest_asyncio.apply()

# 创建客户端实例并运行
client = AsrWsClient(appid, token, cluster)
try:
    loop = asyncio.get_event_loop()
    loop.run_until_complete(client.process_microphone())
except KeyboardInterrupt:
    print("\n程序已被用户中断")
except Exception as e:
    print(f"\n程序异常: {e}")
finally:
    print("程序已退出")


GUI is cleared because of reinit
建立WebSocket连接...
初始化响应: {'payload_msg': {'addition': {'duration': '0', 'logid': '2024102815475802D64850AD6CE3A1AB64'}, 'code': 1000, 'message': 'Success', 'reqid': '25374b1e-1ba3-494c-a8d4-253b6718d08f', 'sequence': 1}, 'payload_size': 159}
初始化成功
录音任务已启动
开始录音...
录音过程发生错误: sent 1011 (unexpected error) keepalive ping timeout; no close frame received
[识别中...] 不要出声文轩哎呦
[最终结果] 不要出声文轩哎呦！
[识别中...] 你选啊
[最终结果] 一圈啊。
[识别中...] 哎呀
[最终结果] 哎呀。
[识别中...] 又到天天在这里来的搞熟了的吗而且熬夜要从我从一年级教那个班的经常搞带他们什么晚托午托
[最终结果] 如果天天在这里来，都搞熟了的吗？而且我要从我从一年级教那个班了，经常搞带他们什么晚托午托。
[识别中...] 
[最终结果] 嗯。
[识别中...] 
[最终结果] 
[识别中...] 假若我有那么一箱子画
[最终结果] 假若我有那么一箱子画，
[识别中...] 老舍
[最终结果] 老舍。
[识别中...] 在各种艺术作品中我特别喜爱图画
[最终结果] 在各种艺术作品中，我特别喜爱图画。
[识别中...] 虽然爱画可是我不收藏画因为第一我不会鉴别古画的真假第二我没有购置名做的
[最终结果] 虽然爱画，可是我不收藏画，因为第一我不会鉴别古画的真假，第二我没有购置名做的彩礼。
[识别中...] 第三我并不爱那只拜色退的老东西不管他怎样古老怎样值钱
[最终结果] 第三，我并不爱那只败色退的老东西，不管他怎样古老，怎样值钱。
[识别中...] 
[最终结果] 哎呀。

程序异常: name 'generate_last_audio_default_header' is not defined
程序已退出


GUI is cleared because of reinit
建立WebSocket连接...
初始化响应: {'payload_msg': {'addition': {'duration': '0', 'logid': '202410281554592A408E52E2F13B60B12A'}, 'code': 1000, 'message': 'Success', 'reqid': 'ca7c7898-36c4-4e37-a093-5c84700b20ac', 'sequence': 1}, 'payload_size': 158}
初始化成功
录音任务已启动
开始录音...
[识别中...] 我爱十人的话因为色彩鲜明看见
[最终结果] 我爱识人的话，因为色彩鲜明看见。
[识别中...] 你走到明天来写二十件是不是还不够
[最终结果] 你等着，明天来写20遍是不是还不够？
[识别中...] 你管
[最终结果] 你管别人怎么。
[识别中...] 他们俩种了这个大老大去
[最终结果] 他们俩走了，这个大老大去。
[识别中...] 起来使我心中舒服而且不必为他们预备保险箱
[最终结果] 起来使我心中舒服，而且不必为他们预备保险箱。
[识别中...] 识人的话也有很贵的
[最终结果] 识人的话也有很贵的。
[识别中...] 我不会拿一本小说的稿费去
[最终结果] 我不会拿一本小说的稿费去换一张。
[识别中...] 的话看画时虽然心里舒服可是饿着肚子去看恐怕就不十分舒服了
[最终结果] 的话，看话时虽然心里舒服，可是饿着肚子去看，恐怕就不十分舒服了。
[识别中...] 我所有的话差不多都是朋友们送的
[最终结果] 我所有的话差不多都是朋友们送的。
[识别中...] 朋友们赠送的话除艺术价值之外还有友谊的价值
[最终结果] 朋友们赠送的画除艺术价值之外，还有有益的价值。
[识别中...] 及两个例子吧
[最终结果] 举两个例子吧。
[识别中...] 北平名画家闫伯龙
[最终结果] 北平名画家闫伯龙。
[识别中...] 五六公主
[最终结果] 是公主。
[识别中...] 
[最终结果] 
[识别中...] 
[最终结果] 
[识别中...] 你看啊
[最终结果] 你看啊！
[识别中...] 二十分
[最终结果] 20分。
[识别中...] 
[最终结果] 
[识别中...

NameError: name 'result_data' is not defined

In [5]:
import json
import uuid
import gzip
import asyncio
import websockets
import numpy as np
import sounddevice as sd
import nest_asyncio
from unihiker import GUI
import time

# 配置参数
appid = "4166554764"    # 项目的 appid
token = "ggmUTHHMXio-nJlKMkRvqEgkcWyfDK0K"    # 项目的 token
cluster = "volcengine_streaming_common"  # 请求的集群

# 协议常量
PROTOCOL_VERSION = 0b0001
DEFAULT_HEADER_SIZE = 0b0001

PROTOCOL_VERSION_BITS = 4
HEADER_BITS = 4
MESSAGE_TYPE_BITS = 4
MESSAGE_TYPE_SPECIFIC_FLAGS_BITS = 4
MESSAGE_SERIALIZATION_BITS = 4
MESSAGE_COMPRESSION_BITS = 4
RESERVED_BITS = 8

# Message Type:
CLIENT_FULL_REQUEST = 0b0001
CLIENT_AUDIO_ONLY_REQUEST = 0b0010
SERVER_FULL_RESPONSE = 0b1001
SERVER_ACK = 0b1011
SERVER_ERROR_RESPONSE = 0b1111

# Message Type Specific Flags
NO_SEQUENCE = 0b0000
POS_SEQUENCE = 0b0001
NEG_SEQUENCE = 0b0010
NEG_SEQUENCE_1 = 0b0011

# Message Serialization
NO_SERIALIZATION = 0b0000
JSON = 0b0001
THRIFT = 0b0011
CUSTOM_TYPE = 0b1111

# Message Compression
NO_COMPRESSION = 0b0000
GZIP = 0b0001
CUSTOM_COMPRESSION = 0b1111




# 初始化GUI
gui = GUI()

# 创建标题
title = gui.draw_text(
    x=120,
    y=30,
    text="语音识别系统",
    font_size=24,
    origin='center'
)

# 创建文本显示区域
text_box = gui.add_text_box(
    x=10,
    y=70,
    w=220,  # 宽度
    h=200,  # 高度
    text=""  # 初始为空
)

# 创建状态显示
status = gui.draw_text(
    x=120,
    y=280,
    text="准备中...",
    font_size=16,
    origin='center'
)

# 用于存储完整文本
full_text = ""

def update_text_display(new_text):
    """更新文本显示，累积新内容"""
    global full_text
    
    # 添加新文本
    if new_text:
        full_text = full_text + new_text + "\n"
        
        # 更新显示
        text_box.config(text=full_text)

def generate_header(
    version=PROTOCOL_VERSION,
    message_type=CLIENT_FULL_REQUEST,
    message_type_specific_flags=NO_SEQUENCE,
    serial_method=JSON,
    compression_type=GZIP,
    reserved_data=0x00,
    extension_header=bytes()
):
    header = bytearray()
    header_size = int(len(extension_header) / 4) + 1
    header.append((version << 4) | header_size)
    header.append((message_type << 4) | message_type_specific_flags)
    header.append((serial_method << 4) | compression_type)
    header.append(reserved_data)
    header.extend(extension_header)
    return header

def generate_full_default_header():
    return generate_header()

def generate_audio_default_header():
    return generate_header(
        message_type=CLIENT_AUDIO_ONLY_REQUEST
    )

def parse_response(res):
    """解析响应"""
    try:
        protocol_version = res[0] >> 4
        header_size = res[0] & 0x0f
        message_type = res[1] >> 4
        message_type_specific_flags = res[1] & 0x0f
        serialization_method = res[2] >> 4
        message_compression = res[2] & 0x0f
        reserved = res[3]
        header_extensions = res[4:header_size * 4]
        payload = res[header_size * 4:]
        result = {}
        payload_msg = None
        payload_size = 0
        
        if message_type == SERVER_FULL_RESPONSE:
            payload_size = int.from_bytes(payload[:4], "big", signed=True)
            payload_msg = payload[4:]
        elif message_type == SERVER_ACK:
            seq = int.from_bytes(payload[:4], "big", signed=True)
            result['seq'] = seq
            if len(payload) >= 8:
                payload_size = int.from_bytes(payload[4:8], "big", signed=False)
                payload_msg = payload[8:]
        elif message_type == SERVER_ERROR_RESPONSE:
            code = int.from_bytes(payload[:4], "big", signed=False)
            result['code'] = code
            payload_size = int.from_bytes(payload[4:8], "big", signed=False)
            payload_msg = payload[8:]
            
        if payload_msg is None:
            return result
            
        if message_compression == GZIP:
            payload_msg = gzip.decompress(payload_msg)
            
        if serialization_method == JSON:
            payload_msg = json.loads(str(payload_msg, "utf-8"))
        elif serialization_method != NO_SERIALIZATION:
            payload_msg = str(payload_msg, "utf-8")
            
        result['payload_msg'] = payload_msg
        result['payload_size'] = payload_size
        return result
    except Exception as e:
        return {"error": f"Failed to parse response: {str(e)}"}

class AsrWsClient:
    def __init__(self, appid, token, cluster):
        self.appid = appid
        self.token = token
        self.cluster = cluster
        self.ws_url = "wss://openspeech.bytedance.com/api/v2/asr"
        self.success_code = 1000
        self.uid = "streaming_asr_demo"
        self.workflow = "audio_in,resample,partition,vad,fe,decode,itn,nlu_punctuate"
        self.show_language = False
        self.show_utterances = True
        self.result_type = "single"
        self.format = "raw"
        self.rate = 16000
        self.language = "zh-CN"
        self.bits = 16
        self.channel = 1
        self.codec = "raw"
        self.auth_method = "token"

    def construct_request(self, reqid):
        return {
            'app': {
                'appid': self.appid,
                'cluster': self.cluster,
                'token': self.token,
            },
            'user': {
                'uid': self.uid
            },
            'request': {
                'reqid': reqid,
                'nbest': 1,
                'workflow': self.workflow,
                'show_language': self.show_language,
                'show_utterances': self.show_utterances,
                'result_type': self.result_type,
                'sequence': 1
            },
            'audio': {
                'format': self.format,
                'rate': self.rate,
                'language': self.language,
                'bits': self.bits,
                'channel': self.channel,
                'codec': self.codec
            }
        }

    def token_auth(self):
        return {'Authorization': f'Bearer; {self.token}'}

    async def process_microphone(self):
        """实时麦克风录音并识别"""
        reqid = str(uuid.uuid4())
        request_params = self.construct_request(reqid)
        
        # 构造初始请求
        payload_bytes = str.encode(json.dumps(request_params))
        payload_bytes = gzip.compress(payload_bytes)
        full_request = bytearray(generate_full_default_header())
        full_request.extend(len(payload_bytes).to_bytes(4, 'big'))
        full_request.extend(payload_bytes)

        print("建立WebSocket连接...")
        async with websockets.connect(
            self.ws_url, 
            extra_headers=self.token_auth(), 
            max_size=1000000000
        ) as ws:
            # 发送初始请求
            await ws.send(full_request)
            response = await ws.recv()
            result = parse_response(response)
            print(f"初始化响应: {result}")
            
            if 'payload_msg' in result and result['payload_msg']['code'] == self.success_code:
                print("初始化成功")
                print("录音任务已启动")
                chunk_size = 9600  # 每次读取的采样点数
                
                with sd.InputStream(
                    channels=1, 
                    samplerate=16000,
                    dtype=np.int16,
                    blocksize=chunk_size,
                    callback=None
                ) as stream:
                    print("开始录音...")
                    try:
                        while True:
                            # 读取音���
                            audio_data, overflowed = stream.read(chunk_size)
                            if overflowed:
                                print("警告：音频缓冲区溢出")
                                
                            # 转换为字节
                            audio_bytes = audio_data.tobytes()
                            
                            # 压缩音频数据
                            compressed_audio = gzip.compress(audio_bytes)
                            
                            # 构造音频数据请求
                            audio_request = bytearray(generate_audio_default_header())
                            audio_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                            audio_request.extend(compressed_audio)
                            
                            # 发送音频数据
                            await ws.send(audio_request)
                            
                            # 接收识别结果
                            response = await ws.recv()
                            result = parse_response(response)
                            
                            # 处理识别结果
                            if 'payload_msg' in result and 'result' in result['payload_msg']:
                                utterances = result['payload_msg']['result'][0].get('utterances', [])
                                for utterance in utterances:
                                    if not utterance['definite']:
                                        print(f"\r[识别中...] {utterance['text']}", end='', flush=True)
                                    else:
                                        print(f"\n[最终结果] {utterance['text']}")
                                        update_text_display(utterance['text'])
                    except KeyboardInterrupt:
                        # 发送最后一个音频包
                        last_request = bytearray(generate_last_audio_default_header())
                        last_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                        last_request.extend(compressed_audio)
                        await ws.send(last_request)
                        print("\n录音已停止")
                    except Exception as e:
                        print(f"录音过程发生错误: {e}")
            else:
                print(f"初始化失败: {result['payload_msg'].get('message')}")

# 在notebook中运行异步代码的辅助函数
import nest_asyncio
nest_asyncio.apply()

# 创建客户端实例并运行
client = AsrWsClient(appid, token, cluster)
try:
    loop = asyncio.get_event_loop()
    loop.run_until_complete(client.process_microphone())
except KeyboardInterrupt:
    print("\n程序已被用户中断")
except Exception as e:
    print(f"\n程序异常: {e}")
finally:
    print("程序已退出")

def update_text_with_scroll(new_text):
    """更新文本并滚动到底部"""
    try:
        # 获取当前内容并追加新文本
        current_text = text_box.text.get("1.0", "end-1c")  # end-1c 去掉最后的换行符
        updated_text = current_text + new_text + "\n"
        
        # 更新文本内容
        text_box.text.delete("1.0", "end")
        text_box.text.insert("1.0", updated_text)
        
        # 强制滚动到底部
        text_box.text.yview_moveto(1.0)
        
        # 更新GUI
        if hasattr(gui, 'update'):
            gui.update()
            
    except Exception as e:
        print(f"更新文本错误: {e}")

# 测试文本更新
count = 1
while True:
    try:
        # 添加新文本
        new_text = f"这是第 {count} 行测试文本"
        update_text_with_scroll(new_text)
        
        count += 1
        time.sleep(1)
        
    except KeyboardInterrupt:
        break

def update_recognition_text(new_text):
    """更新识别结果并滚动到底部"""
    current_text = text_box.text.get("1.0", "end")  # 获取当前内容
    text_box.config(text=current_text + new_text + "\n")  # 追加新内容并换行
    text_box.text.see("end")  # 滚动到底部

# 在识别结果处理部分使用：
if result_data['result'][0].get('is_final'):
    text = result_data['result'][0].get('text', '')
    print(f"识别结果: {text}")
    update_recognition_text(text)


GUI is cleared because of reinit
建立WebSocket连接...
初始化响应: {'payload_msg': {'addition': {'duration': '0', 'logid': '20241028160346C5F618AA579BA0A217C1'}, 'code': 1000, 'message': 'Success', 'reqid': 'e85fef8a-d664-4bc3-bb83-996e81f1a006', 'sequence': 1}, 'payload_size': 159}
初始化成功
录音任务已启动
开始录音...
[识别中...] 龙呀龙虾儿哎呀妈不错
[最终结果] 龙也龙虾儿，哎呀，那儿不错。
[识别中...] 文轩同志要好好学习哎呀是我幼年时的
[最终结果] 文轩同志要好好学习，哎呀，是我幼年时的同学。
[识别中...] 我很喜爱他的话但是他总不肯给我画
[最终结果] 我很喜爱他的话，但是他总不肯给我画。
[识别中...] 陛下婚期的时候我决定
[最终结果] 陛下，婚期的时候我决定把握。
[识别中...] 
[最终结果] 拖住时机！
[识别中...] 博龙我毫不客气地对他说
[最终结果] 博龙，我毫不客气地对他说。
[识别中...] 不要送礼我要你一张画不画不行哦
[最终结果] 不要送礼，我要你一张画，不画不行哦！
[识别中...] 他没有再推脱给我化了张目视图
[最终结果] 他没有在推脱，给我画了张目视图。
[识别中...] 途中的富人小儿肥猪与桐树都画的极好
[最终结果] 图中的富人小儿、肥猪与桐树都画得极好。
[识别中...] 可惜他把图章印到了
[最终结果] 可惜他把图章印到了。
[识别中...] 虽然图
[最终结果] 虽然图章叫。
[识别中...] 朝廷我还是很爱这张画
[最终结果] 朝廷，我还是很爱这张话。
[识别中...] 因为伯龙就是那么个一天到晚慌里慌张的人这个脚朝天的图章正好体现了他的性格
[最终结果] 因为伯龙就是那么个一天到晚慌里慌张的人，这个脚朝天的图章正好体现了他的性格。
[识别中...] 第二个例子是齐白石大师所做的一张基础图我早就想得到他的一
[最终结果] 第二个例子是齐白石大师所做的一张基础图，我早就想得到他的一张。
[识别中...] 但这位老人永远不给任何

NameError: name 'result_data' is not defined

In [None]:
测试屏幕TEXT_BOX控件滚动条始终保持在最下面的功能

In [6]:
from unihiker import GUI
import time
from tkinter import END  # 需要导入END常量

gui = GUI()

# 创建文本框
text_box = gui.add_text_box(
    x=120,
    y=160,
    w=200,
    h=200,
    origin='center',
    font_size=14
)

# 方式1：每次更新后手动滚动到底部
count = 1
while True:
    new_text = f"Starting...\n" + "\n".join([f"Log #{i}" for i in range(1, count + 1)])
    text_box.config(text=new_text)
    text_box.text.see(END)  # 滚动到底部
    
    count += 1
    time.sleep(1)
    
    if gui.master.winfo_exists():
        gui.update()

GUI is cleared because of reinit


KeyboardInterrupt: 

In [None]:
实现麦克风音频流式语音识别，unihiker屏幕滚动显示

In [None]:
pip install dashscope



In [1]:
import json
import uuid
import gzip
import asyncio
import websockets
import numpy as np
import sounddevice as sd
import nest_asyncio
from unihiker import GUI
import time
from tkinter import END

# 配置参数
appid = "4166554764"    # 项目的 appid
token = "ggmUTHHMXio-nJlKMkRvqEgkcWyfDK0K"    # 项目的 token
cluster = "volcengine_streaming_common"  # 请求的集群

# 协议常量
PROTOCOL_VERSION = 0b0001
DEFAULT_HEADER_SIZE = 0b0001

PROTOCOL_VERSION_BITS = 4
HEADER_BITS = 4
MESSAGE_TYPE_BITS = 4
MESSAGE_TYPE_SPECIFIC_FLAGS_BITS = 4
MESSAGE_SERIALIZATION_BITS = 4
MESSAGE_COMPRESSION_BITS = 4
RESERVED_BITS = 8

# Message Type:
CLIENT_FULL_REQUEST = 0b0001
CLIENT_AUDIO_ONLY_REQUEST = 0b0010
SERVER_FULL_RESPONSE = 0b1001
SERVER_ACK = 0b1011
SERVER_ERROR_RESPONSE = 0b1111

# Message Type Specific Flags
NO_SEQUENCE = 0b0000
POS_SEQUENCE = 0b0001
NEG_SEQUENCE = 0b0010
NEG_SEQUENCE_1 = 0b0011

# Message Serialization
NO_SERIALIZATION = 0b0000
JSON = 0b0001
THRIFT = 0b0011
CUSTOM_TYPE = 0b1111

# Message Compression
NO_COMPRESSION = 0b0000
GZIP = 0b0001
CUSTOM_COMPRESSION = 0b1111




# 初始化GUI
gui = GUI()

# 创建标题
title = gui.draw_text(
    x=120,
    y=30,
    text="语音识别系统",
    font_size=24,
    origin='center'
)

# 创建文本框
text_box = gui.add_text_box(
    x=120,      # 中心x坐标
    y=160,      # 中心y坐标
    w=220,      # 宽度
    h=200,      # 高度
    origin='center',  # 居中对齐
    font_size=14
)

# 存储所有识别文本
all_texts = []

def update_recognition_text(new_text):
    """更新识别结果并保持滚动条在底部"""
    try:
        # 添加新文本到列表
        all_texts.append(new_text)
        
        # 更新文本框内容
        full_text = "\n".join(all_texts)
        text_box.config(text=full_text)
        
        # 滚动到底部
        text_box.text.see(END)
        
        # 更新GUI
        if gui.master.winfo_exists():
            gui.update()
            
    except Exception as e:
        print(f"更新文本错误: {e}")

def generate_header(
    version=PROTOCOL_VERSION,
    message_type=CLIENT_FULL_REQUEST,
    message_type_specific_flags=NO_SEQUENCE,
    serial_method=JSON,
    compression_type=GZIP,
    reserved_data=0x00,
    extension_header=bytes()
):
    header = bytearray()
    header_size = int(len(extension_header) / 4) + 1
    header.append((version << 4) | header_size)
    header.append((message_type << 4) | message_type_specific_flags)
    header.append((serial_method << 4) | compression_type)
    header.append(reserved_data)
    header.extend(extension_header)
    return header

def generate_full_default_header():
    return generate_header()

def generate_audio_default_header():
    return generate_header(
        message_type=CLIENT_AUDIO_ONLY_REQUEST
    )

def parse_response(res):
    """解析响应"""
    try:
        protocol_version = res[0] >> 4
        header_size = res[0] & 0x0f
        message_type = res[1] >> 4
        message_type_specific_flags = res[1] & 0x0f
        serialization_method = res[2] >> 4
        message_compression = res[2] & 0x0f
        reserved = res[3]
        header_extensions = res[4:header_size * 4]
        payload = res[header_size * 4:]
        result = {}
        payload_msg = None
        payload_size = 0
        
        if message_type == SERVER_FULL_RESPONSE:
            payload_size = int.from_bytes(payload[:4], "big", signed=True)
            payload_msg = payload[4:]
        elif message_type == SERVER_ACK:
            seq = int.from_bytes(payload[:4], "big", signed=True)
            result['seq'] = seq
            if len(payload) >= 8:
                payload_size = int.from_bytes(payload[4:8], "big", signed=False)
                payload_msg = payload[8:]
        elif message_type == SERVER_ERROR_RESPONSE:
            code = int.from_bytes(payload[:4], "big", signed=False)
            result['code'] = code
            payload_size = int.from_bytes(payload[4:8], "big", signed=False)
            payload_msg = payload[8:]
            
        if payload_msg is None:
            return result
            
        if message_compression == GZIP:
            payload_msg = gzip.decompress(payload_msg)
            
        if serialization_method == JSON:
            payload_msg = json.loads(str(payload_msg, "utf-8"))
        elif serialization_method != NO_SERIALIZATION:
            payload_msg = str(payload_msg, "utf-8")
            
        result['payload_msg'] = payload_msg
        result['payload_size'] = payload_size
        return result
    except Exception as e:
        return {"error": f"Failed to parse response: {str(e)}"}

class AsrWsClient:
    def __init__(self, appid, token, cluster):
        self.appid = appid
        self.token = token
        self.cluster = cluster
        self.ws_url = "wss://openspeech.bytedance.com/api/v2/asr"
        self.success_code = 1000
        self.uid = "streaming_asr_demo"
        self.workflow = "audio_in,resample,partition,vad,fe,decode,itn,nlu_punctuate"
        self.show_language = False
        self.show_utterances = True
        self.result_type = "single"
        self.format = "raw"
        self.rate = 16000
        self.language = "zh-CN"
        self.bits = 16
        self.channel = 1
        self.codec = "raw"
        self.auth_method = "token"

    def construct_request(self, reqid):
        return {
            'app': {
                'appid': self.appid,
                'cluster': self.cluster,
                'token': self.token,
            },
            'user': {
                'uid': self.uid
            },
            'request': {
                'reqid': reqid,
                'nbest': 1,
                'workflow': self.workflow,
                'show_language': self.show_language,
                'show_utterances': self.show_utterances,
                'result_type': self.result_type,
                'sequence': 1
            },
            'audio': {
                'format': self.format,
                'rate': self.rate,
                'language': self.language,
                'bits': self.bits,
                'channel': self.channel,
                'codec': self.codec
            }
        }

    def token_auth(self):
        return {'Authorization': f'Bearer; {self.token}'}

    async def process_microphone(self):
        """实时麦克风录音并识别"""
        reqid = str(uuid.uuid4())
        request_params = self.construct_request(reqid)
        
        # 构造初始请求
        payload_bytes = str.encode(json.dumps(request_params))
        payload_bytes = gzip.compress(payload_bytes)
        full_request = bytearray(generate_full_default_header())
        full_request.extend(len(payload_bytes).to_bytes(4, 'big'))
        full_request.extend(payload_bytes)

        print("建立WebSocket连接...")
        async with websockets.connect(
            self.ws_url, 
            extra_headers=self.token_auth(), 
            max_size=1000000000
        ) as ws:
            # 发送初始请求
            await ws.send(full_request)
            response = await ws.recv()
            result = parse_response(response)
            print(f"初始化响应: {result}")
            
            if 'payload_msg' in result and result['payload_msg']['code'] == self.success_code:
                print("初始化成功")
                print("录音任务已启动")
                chunk_size = 9600  # 每次读取的采样点数
                
                with sd.InputStream(
                    channels=1, 
                    samplerate=16000,
                    dtype=np.int16,
                    blocksize=chunk_size,
                    callback=None
                ) as stream:
                    print("开始录音...")
                    try:
                        while True:
                            # 读取音
                            audio_data, overflowed = stream.read(chunk_size)
                            if overflowed:
                                print("警告：音频缓冲区溢出")
                                
                            # 转换为字节
                            audio_bytes = audio_data.tobytes()
                            
                            # 压缩音频数据
                            compressed_audio = gzip.compress(audio_bytes)
                            
                            # 构造音频数据请求
                            audio_request = bytearray(generate_audio_default_header())
                            audio_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                            audio_request.extend(compressed_audio)
                            
                            # 发送音频数据
                            await ws.send(audio_request)
                            
                            # 接收识别结果
                            response = await ws.recv()
                            result = parse_response(response)
                            
                            # 处理识别结果
                            if 'payload_msg' in result and 'result' in result['payload_msg']:
                                utterances = result['payload_msg']['result'][0].get('utterances', [])
                                for utterance in utterances:
                                    if not utterance['definite']:
                                        print(f"\r[识别中...] {utterance['text']}", end='', flush=True)
                                    else:
                                        print(f"\n[最终结果] {utterance['text']}")
                                        update_recognition_text(utterance['text'])
                    except KeyboardInterrupt:
                        # 发送最后一个音频包
                        last_request = bytearray(generate_last_audio_default_header())
                        last_request.extend(len(compressed_audio).to_bytes(4, 'big'))
                        last_request.extend(compressed_audio)
                        await ws.send(last_request)
                        print("\n录音已停止")
                    except Exception as e:
                        print(f"录音过程发生错误: {e}")
            else:
                print(f"初始化失败: {result['payload_msg'].get('message')}")

# 在notebook中运行异步代码的辅助函数
import nest_asyncio
nest_asyncio.apply()

# 创建客户端实例并运行
client = AsrWsClient(appid, token, cluster)
try:
    loop = asyncio.get_event_loop()
    loop.run_until_complete(client.process_microphone())
except KeyboardInterrupt:
    print("\n程序已被用户中断")
except Exception as e:
    print(f"\n程序异常: {e}")
finally:
    print("程序已退出")

def update_text_with_scroll(new_text):
    """更新文本并滚动到底部"""
    try:
        # 获取当前内容并追加新文本
        current_text = text_box.text.get("1.0", "end-1c")  # end-1c 去掉最后的换行符
        updated_text = current_text + new_text + "\n"
        
        # 更新文本内容
        text_box.text.delete("1.0", "end")
        text_box.text.insert("1.0", updated_text)
        
        # 强制滚动到底部
        text_box.text.yview_moveto(1.0)
        
        # 更新GUI
        if hasattr(gui, 'update'):
            gui.update()
            
    except Exception as e:
        print(f"更新文本错误: {e}")

# 测试文本更新
count = 1
while True:
    try:
        # 添加新文本
        new_text = f"这是第 {count} 行测试文本"
        update_text_with_scroll(new_text)
        
        count += 1
        time.sleep(1)
        
    except KeyboardInterrupt:
        break

def update_recognition_text(new_text):
    """更新识别结果并滚动到底部"""
    current_text = text_box.text.get("1.0", "end")  # 获取当前内容
    text_box.config(text=current_text + new_text + "\n")  # 追加新内容并换行
    text_box.text.see("end")  # 滚动到底部

# 在识别结果处理部分使用：
if result_data['result'][0].get('is_final'):
    text = result_data['result'][0].get('text', '')
    print(f"识别结果: {text}")
    update_recognition_text(text)



建立WebSocket连接...
初始化响应: {'payload_msg': {'addition': {'duration': '0', 'logid': '202410301449293DBB14907DDE5EF14C4F'}, 'code': 1000, 'message': 'Success', 'reqid': '3fbc2504-535d-4524-b71e-2cc670f8c62d', 'sequence': 1}, 'payload_size': 159}
初始化成功
录音任务已启动
开始录音...

程序异常: name 'generate_last_audio_default_header' is not defined
程序已退出


NameError: name 'result_data' is not defined