In [None]:
本地文件上传音频缓存服务器测试

In [12]:
import requests
import os

def upload_audio(file_path, server_url, api_key):
    if not os.path.exists(file_path):
        print(f"错误：文件 {file_path} 不存在")
        return

    try:
        with open(file_path, 'rb') as audio_file:
            # 根据文件扩展名设置正确的 Content-Type
            file_extension = os.path.splitext(file_path)[1].lower()
            if file_extension == '.wav':
                content_type = 'audio/wav'
            elif file_extension == '.mp3':
                content_type = 'audio/mpeg'
            else:
                content_type = 'audio/mpeg'  # 默认使用 audio/mpeg
            
            headers = {
                'Content-Type': content_type,
                'X-API-Key': api_key  # 使用 X-API-Key 头部传递 API 密钥
            }
            response = requests.post(f"{server_url}/api/upload", data=audio_file, headers=headers)
        
        if response.status_code == 200:
            print("文件上传成功")
            print("服务器响应:", response.json())
        elif response.status_code == 401:
            print("上传失败：无效的 API 密钥")
        else:
            print(f"上传失败，状态码: {response.status_code}")
            print("服务器响应:", response.text)
    except requests.RequestException as e:
        print(f"上传过程中发生错误: {e}")

if __name__ == "__main__":
    server_url = "http://www.52ai.fun"
    # 使用当前工作目录
    current_dir = os.getcwd()
    audio_file_path = os.path.join(current_dir, "3s.wav")
    api_key = "arPmwKIS3vDRn8kLyesSQw6bZGZVkPbHDEAnH9avi7w"  # 替换为服务器生成的实际 API 密钥
    
    if not os.path.exists(audio_file_path):
        print(f"错误：文件 {audio_file_path} 不存在")
    else:
        upload_audio(audio_file_path, server_url, api_key)

文件上传成功
服务器响应: {'message': 'File uploaded successfully', 'file_name': 'd7ac9c08-146e-4b10-987f-e0bf0cce22a9.wav', 'file_url': 'http://www.52ai.com/voice/d7ac9c08-146e-4b10-987f-e0bf0cce22a9.wav'}


In [None]:
实现了本地录音，并且上传服务器

In [14]:
import os
import time
import requests
from unihiker import Audio

# 服务器API地址
SERVER_URL = "http://www.52ai.fun"  # 注意这里添加了 www
API_URL = f"{SERVER_URL}/api/upload"

# API密钥
API_KEY = "arPmwKIS3vDRn8kLyesSQw6bZGZVkPbHDEAnH9avi7w"

# 音频采集设置
SAMPLE_RATE = 16000
DURATION = 5  # 录音时长(秒)
CHANNELS = 1  # 单声道

def record_audio():
    print("开始录音...")
    audio = Audio()
    audio_file = "/tmp/recording.wav"
    
    try:
        audio.record(audio_file, DURATION)
        time.sleep(DURATION + 1)  # 等待录音完成，多等待1秒确保文件保存
        print(f"录音已保存为 {audio_file}")
        return audio_file
    except Exception as e:
        print(f"录音过程中发生错误: {e}")
        return None

def upload_audio(file_path):
    if not os.path.exists(file_path):
        print(f"错误：文件 {file_path} 不存在")
        return None

    try:
        with open(file_path, 'rb') as audio_file:
            headers = {
                'Content-Type': 'audio/wav',
                'X-API-Key': API_KEY
            }
            print(f"正在上传文件到 {API_URL}")
            print(f"使用的头部信息: {headers}")
            response = requests.post(API_URL, data=audio_file, headers=headers, timeout=30)
        
        print(f"服务器响应状态码: {response.status_code}")
        print(f"服务器响应头: {response.headers}")
        print(f"服务器响应内容: {response.text[:200]}...")  # 打印响应内容的前200个字符
        
        if response.status_code == 200:
            print("文件上传成功")
            try:
                result = response.json()
                print(f"解析后的JSON响应: {result}")
                if 'file_url' in result:
                    audio_url = result['file_url']
                    print(f"服务器返回的音频URL: {audio_url}")
                    return audio_url
                else:
                    print("警告：服务器响应中没有 file_url 字段")
            except ValueError:
                print("错误：无法解析服务器响应为JSON")
        elif response.status_code == 401:
            print("上传失败：无效的 API 密钥")
        else:
            print(f"上传失败，状态码: {response.status_code}")
            print("服务器响应:", response.text)
        return None
    except requests.RequestException as e:
        print(f"上传过程中发生错误: {e}")
        print(f"错误类型: {type(e).__name__}")
        if isinstance(e, requests.ConnectionError):
            print("连接错误。请检查网络连接和服务器地址。")
        elif isinstance(e, requests.Timeout):
            print("请求超时。服务器可能响应过慢或不可达。")
        return None

def check_network():
    try:
        response = requests.get(SERVER_URL, timeout=5)
        print(f"服务器连接测试成功。状态码: {response.status_code}")
    except requests.RequestException as e:
        print(f"服务器连接测试失败: {e}")

def main():
    check_network()
    
    while True:
        input("按回车键开始录音...")
        audio_file = record_audio()
        if audio_file:
            audio_url = upload_audio(audio_file)
            if audio_url:
                print(f"音频文件已成功上传，URL: {audio_url}")
            else:
                print("音频文件上传失败")
        else:
            print("录音失败")
        
        time.sleep(2)  # 等待2秒后准备下一次录音

if __name__ == "__main__":
    main()

服务器连接测试成功。状态码: 200
按回车键开始录音...
开始录音...
录音已保存为 /tmp/recording.wav
正在上传文件到 http://www.52ai.fun/api/upload
使用的头部信息: {'Content-Type': 'audio/wav', 'X-API-Key': 'arPmwKIS3vDRn8kLyesSQw6bZGZVkPbHDEAnH9avi7w'}
服务器响应状态码: 200
服务器响应头: {'Server': 'openresty', 'Date': 'Wed, 23 Oct 2024 02:15:10 GMT', 'Content-Type': 'application/json', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', 'Cache-Control': 'no-cache'}
服务器响应内容: {"message": "File uploaded successfully", "file_name": "5f42d3b3-95cf-4b96-8667-d316f6dcc54d.wav", "file_url": "http://www.52ai.com/voice/5f42d3b3-95cf-4b96-8667-d316f6dcc54d.wav"}...
文件上传成功
解析后的JSON响应: {'message': 'File uploaded successfully', 'file_name': '5f42d3b3-95cf-4b96-8667-d316f6dcc54d.wav', 'file_url': 'http://www.52ai.com/voice/5f42d3b3-95cf-4b96-8667-d316f6dcc54d.wav'}
服务器返回的音频URL: http://www.52ai.com/voice/5f42d3b3-95cf-4b96-8667-d316f6dcc54d.wav
音频文件已成功上传，URL: http://www.52ai.com/voice/5f42d3b3-95cf-4b96-8667-d316f6dcc54

KeyboardInterrupt: Interrupted by user

In [33]:
import os
import time
import json
import requests
from unihiker import Audio

# 服务器API地址
SERVER_URL = "http://www.52ai.fun"
API_URL = f"{SERVER_URL}/api/upload"

# API密钥
SERVER_API_KEY = "arPmwKIS3vDRn8kLyesSQw6bZGZVkPbHDEAnH9avi7w"
ALIYUN_API_KEY = "sk-7c04ee6f9432492bb344baa7a5c0162f"  # 替换为您的阿里云API Key

# 音频采集设置
SAMPLE_RATE = 16000
DURATION = 5  # 录音时长(秒)
CHANNELS = 1  # 单声道

def record_audio():
    print("开始录音...")
    audio = Audio()
    audio_file = "/tmp/recording.wav"
    
    try:
        audio.record(audio_file, DURATION)
        time.sleep(DURATION + 1)  # 等待录音完成，多等待1秒确保文件保存
        print(f"录音已保存为 {audio_file}")
        return audio_file
    except Exception as e:
        print(f"录音过程中发生错误: {e}")
        return None

def upload_audio(file_path):
    if not os.path.exists(file_path):
        print(f"错误：文件 {file_path} 不存在")
        return None

    try:
        with open(file_path, 'rb') as audio_file:
            headers = {
                'Content-Type': 'audio/wav',
                'X-API-Key': SERVER_API_KEY
            }
            print(f"正在上传文件到 {API_URL}")
            response = requests.post(API_URL, data=audio_file, headers=headers, timeout=30)
        
        if response.status_code == 200:
            print("文件上传成功")
            result = response.json()
            if 'file_url' in result:
                audio_url = result['file_url']
                print(f"服务器返回的音频URL: {audio_url}")
                return audio_url
            else:
                print("警告：服务器响应中没有 file_url 字段")
        else:
            print(f"上传失败，状态码: {response.status_code}")
        return None
    except requests.RequestException as e:
        print(f"上传过程中发生错误: {e}")
        return None

def recognize_speech(audio_url):
    url = "https://bailian.aliyuncs.com/v2/app/invoke/paraformer-v2"
    headers = {
        "Authorization": f"Bearer {ALIYUN_API_KEY}",
        "Content-Type": "application/json"
    }
    data = {
        "prompt": "请将以下音频转录为文字：",
        "top_p": 0.8,
        "top_k": 100,
        "seed": 1234,
        "input_parameters": {
            "audio_url": audio_url
        }
    }

    try:
        print("正在进行语音识别...")
        print(f"请求URL: {url}")
        print(f"请求头: {headers}")
        print(f"请求数据: {json.dumps(data, ensure_ascii=False, indent=2)}")
        
        response = requests.post(url, headers=headers, json=data)
        print(f"响应状态码: {response.status_code}")
        print(f"响应头: {response.headers}")
        print(f"响应内容: {response.text}")
        
        if response.status_code == 200:
            result = response.json()
            if 'data' in result:
                recognition_text = result['data']
                print(f"识别结果: {recognition_text}")
                return recognition_text
            else:
                print("警告：API响应中没有识别结果")
                print(f"完整的API响应: {json.dumps(result, ensure_ascii=False, indent=2)}")
        else:
            print(f"识别请求失败，状态码: {response.status_code}")
        return None
    except Exception as e:
        print(f"语音识别失败: {str(e)}")
        return None

def main():
    while True:
        input("按回车键开始录音...")
        
        audio_file = record_audio()
        if audio_file:
            audio_url = upload_audio(audio_file)
            if audio_url:
                recognition_result = recognize_speech(audio_url)
                if recognition_result:
                    print("识别完成")
                    print(f"完整识别结果: {recognition_result}")
                else:
                    print("语音识别失败")
            else:
                print("音频文件上传失败")
        else:
            print("录音失败")
        
        time.sleep(2)  # 等待2秒后准备下一次录音

if __name__ == "__main__":
    main()


按回车键开始录音...
开始录音...
录音已保存为 /tmp/recording.wav
正在上传文件到 http://www.52ai.fun/api/upload
文件上传成功
服务器返回的音频URL: http://www.52ai.fun/voice/b1a0251d-99e8-48be-a327-f2502bc1c93f.wav
正在进行语音识别...
请求URL: https://bailian.aliyuncs.com/v2/app/invoke/paraformer-v2
请求头: {'Authorization': 'Bearer sk-7c04ee6f9432492bb344baa7a5c0162f', 'Content-Type': 'application/json'}
请求数据: {
  "prompt": "请将以下音频转录为文字：",
  "top_p": 0.8,
  "top_k": 100,
  "seed": 1234,
  "input_parameters": {
    "audio_url": "http://www.52ai.fun/voice/b1a0251d-99e8-48be-a327-f2502bc1c93f.wav"
  }
}
响应状态码: 200
响应头: {'Date': 'Wed, 23 Oct 2024 03:46:03 GMT', 'Content-Type': 'application/json;charset=UTF-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Vary': 'Accept-Encoding, Origin, Access-Control-Request-Method, Access-Control-Request-Headers', 'X-Content-Type-Options': 'nosniff', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Pragma': 'no-cache', 'Expires': '0', 'X-Frame-Options': 'DENY', 'EagleEye-Tr

KeyboardInterrupt: Interrupted by user

In [25]:
pip install -U openai

Collecting openai
  Using cached openai-1.52.1-py3-none-any.whl (386 kB)
  Using cached openai-1.52.0-py3-none-any.whl (386 kB)
  Using cached openai-1.51.2-py3-none-any.whl (383 kB)
  Using cached openai-1.51.1-py3-none-any.whl (383 kB)
  Using cached openai-1.51.0-py3-none-any.whl (383 kB)
  Using cached openai-1.50.2-py3-none-any.whl (382 kB)
  Using cached openai-1.50.1-py3-none-any.whl (378 kB)
  Using cached openai-1.50.0-py3-none-any.whl (378 kB)
  Using cached openai-1.49.0-py3-none-any.whl (378 kB)
  Using cached openai-1.48.0-py3-none-any.whl (376 kB)
  Using cached openai-1.47.1-py3-none-any.whl (375 kB)
  Using cached openai-1.47.0-py3-none-any.whl (375 kB)
  Using cached openai-1.46.1-py3-none-any.whl (375 kB)
  Using cached openai-1.46.0-py3-none-any.whl (375 kB)
  Using cached openai-1.45.1-py3-none-any.whl (374 kB)
  Using cached openai-1.45.0-py3-none-any.whl (374 kB)
  Using cached openai-1.44.1-py3-none-any.whl (373 kB)
  Using cached openai-1.44.0-py3-none-any.whl (

In [30]:
from http import HTTPStatus
import dashscope
import json
import time
import requests

# 设置您的 DashScope API key
dashscope.api_key = 'sk-7c04ee6f9432492bb344baa7a5c0162f'

def submit_transcription_task(file_urls, language_hints=['zh', 'en']):
    task_response = dashscope.audio.asr.Transcription.async_call(
        model='paraformer-v2',
        file_urls=file_urls,
        language_hints=language_hints
    )
    return task_response.output.task_id

def fetch_transcription_result(task_id):
    return dashscope.audio.asr.Transcription.fetch(task=task_id)

def poll_transcription_task(task_id, max_attempts=30, interval=10):
    for attempt in range(max_attempts):
        response = fetch_transcription_result(task_id)
        status = response.output.task_status
        print(f"Attempt {attempt + 1}: Task status - {status}")
        if status in ['SUCCEEDED', 'FAILED']:
            return response
        time.sleep(interval)
    print("Task polling timed out")
    return None

def get_detailed_transcription(transcription_url):
    response = requests.get(transcription_url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to get detailed transcription. Status code: {response.status_code}")
        return None

def display_transcription_result(detailed_result):
    print("\nDetailed Transcription Result:")
    print(f"File URL: {detailed_result['file_url']}")
    print(f"Audio Format: {detailed_result['properties']['audio_format']}")
    print(f"Sampling Rate: {detailed_result['properties']['original_sampling_rate']} Hz")
    print(f"Duration: {detailed_result['properties']['original_duration_in_milliseconds']} ms")
    
    for transcript in detailed_result['transcripts']:
        print(f"\nChannel ID: {transcript['channel_id']}")
        print(f"Content Duration: {transcript['content_duration_in_milliseconds']} ms")
        print(f"Full Text: {transcript['text']}")
        
        print("\nSentences:")
        for sentence in transcript['sentences']:
            print(f"  {sentence['begin_time']} - {sentence['end_time']} ms: {sentence['text']}")
            
            print("  Words:")
            for word in sentence['words']:
                print(f"    {word['begin_time']} - {word['end_time']} ms: {word['text']}{word['punctuation']}")

def main():
    file_urls = [
        'https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav',
        'https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_male2.wav'
    ]

    task_id = submit_transcription_task(file_urls)
    print(f"Task submitted. Task ID: {task_id}")

    final_response = poll_transcription_task(task_id)

    if final_response and final_response.output.task_status == 'SUCCEEDED':
        print("Transcription completed successfully.")
        
        for result in final_response.output.results:
            print(f"\nProcessing file: {result['file_url']}")
            detailed_result = get_detailed_transcription(result['transcription_url'])
            if detailed_result:
                display_transcription_result(detailed_result)
            else:
                print("Failed to get detailed transcription result.")
    else:
        print("Transcription failed or timed out.")

if __name__ == "__main__":
    main()

Task submitted. Task ID: 3acf258b-9f62-4950-bfed-35dc2a51593a
Attempt 1: Task status - RUNNING
Attempt 2: Task status - SUCCEEDED
Transcription completed successfully.

Processing file: https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_male2.wav

Detailed Transcription Result:
File URL: https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_male2.wav
Audio Format: pcm_s16le
Sampling Rate: 16000 Hz
Duration: 4726 ms

Channel ID: 0
Content Duration: 4570 ms
Full Text: Hello world, 这里是阿里巴巴语音实验室。

Sentences:
  140 - 4710 ms: Hello world, 这里是阿里巴巴语音实验室。
  Words:
    140 - 597 ms: Hello 
    597 - 1054 ms: world, 
    1054 - 1663 ms: 这里
    1663 - 2272 ms: 是阿
    2272 - 2881 ms: 里巴
    2881 - 3490 ms: 巴语
    3490 - 4099 ms: 音实
    4099 - 4710 ms: 验室。

Processing file: https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav

Detailed Transcription Result:
File URL: https://dashscope.oss-cn-beiji

按回车键开始录音...
开始录音...
录音已保存为 /tmp/recording.wav
正在上传文件到 http://www.52ai.fun/api/upload
文件上传成功
服务器返回的音频URL: http://www.52ai.fun/voice/043bca17-8645-4759-ac01-854e2476d603.wav
正在进行语音识别...
请求URL: https://bailian.aliyuncs.com/v2/app/invoke/paraformer-v2
请求头: {'Authorization': 'Bearer sk-7c04ee6f9432492bb344baa7a5c0162f', 'Content-Type': 'application/json'}
请求数据: {
  "prompt": "请将以下音频转录为文字：",
  "top_p": 0.8,
  "top_k": 100,
  "seed": 1234,
  "input_parameters": {
    "audio_url": "http://www.52ai.fun/voice/043bca17-8645-4759-ac01-854e2476d603.wav"
  }
}
响应状态码: 200
响应头: {'Date': 'Wed, 23 Oct 2024 06:57:45 GMT', 'Content-Type': 'application/json;charset=UTF-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Vary': 'Accept-Encoding, Origin, Access-Control-Request-Method, Access-Control-Request-Headers', 'X-Content-Type-Options': 'nosniff', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Pragma': 'no-cache', 'Expires': '0', 'X-Frame-Options': 'DENY', 'EagleEye-Tr

KeyboardInterrupt: Interrupted by user

In [None]:
录音，上传服务器生成URL，语音模型调用URL。识别文字。调试成功。


In [43]:
import os
import time
import json
import requests
from unihiker import Audio
import dashscope
from dashscope.audio.asr import Transcription

# 服务器API地址
SERVER_URL = "http://www.52ai.fun"
API_URL = f"{SERVER_URL}/api/upload"

# API密钥
SERVER_API_KEY = "1F1vmARoSjXRTDvywh9XtbnR8vd74AfffF0t0jn3qhM"
dashscope.api_key = 'sk-7c04ee6f9432492bb344baa7a5c0162f'

# 音频采集设置
SAMPLE_RATE = 16000
DURATION = 20  # 录音时长(秒)，已更改为20秒
CHANNELS = 1  # 单声道

def record_audio():
    print("开始录音...")
    print(f"请说话，录音将持续 {DURATION} 秒...")
    audio = Audio()
    audio_file = "/tmp/recording.wav"
    
    try:
        audio.record(audio_file, DURATION)
        for i in range(DURATION, 0, -1):
            print(f"还剩 {i} 秒...")
            time.sleep(1)
        print("录音完成")
        time.sleep(1)  # 额外等待1秒确保文件保存
        print(f"录音已保存为 {audio_file}")
        return audio_file
    except Exception as e:
        print(f"录音过程中发生错误: {e}")
        return None

def upload_audio(file_path):
    if not os.path.exists(file_path):
        print(f"错误：文件 {file_path} 不存在")
        return None

    try:
        with open(file_path, 'rb') as audio_file:
            headers = {
                'Content-Type': 'audio/wav',
                'X-API-Key': SERVER_API_KEY
            }
            print(f"正在上传文件到 {API_URL}")
            response = requests.post(API_URL, data=audio_file, headers=headers)
        
        if response.status_code == 200:
            print("文件上传成功")
            result = response.json()
            print("服务器响应:", result)
            if 'file_url' in result:
                audio_url = result['file_url']
                print(f"服务器返回的音频URL: {audio_url}")
                return audio_url
            else:
                print("警告：服务器响应中没有 file_url 字段")
        elif response.status_code == 401:
            print("上传失败：无效的 API 密钥")
        else:
            print(f"上传失败，状态码: {response.status_code}")
            print("服务器响应:", response.text)
        return None
    except requests.RequestException as e:
        print(f"上传过程中发生错误: {e}")
        return None

def submit_transcription_task(file_url):
    task_response = Transcription.async_call(
        model='paraformer-v2',
        file_urls=[file_url],
        language_hints=['zh', 'en']
    )
    return task_response.output.task_id

def fetch_transcription_result(task_id):
    return Transcription.fetch(task=task_id)

def poll_transcription_task(task_id, max_attempts=30, interval=2):
    for attempt in range(max_attempts):
        response = fetch_transcription_result(task_id)
        status = response.output.task_status
        print(f"尝试 {attempt + 1}: 任务状态 - {status}")
        if status in ['SUCCEEDED', 'FAILED']:
            return response
        time.sleep(interval)
    print("任务轮询超时")
    return None

def display_transcription_result(detailed_result):
    print("\n详细转录结果:")
    print(f"文件 URL: {detailed_result['file_url']}")
    print(f"音频格式: {detailed_result['properties']['audio_format']}")
    print(f"采样率: {detailed_result['properties']['original_sampling_rate']} Hz")
    print(f"时长: {detailed_result['properties']['original_duration_in_milliseconds']} ms")
    
    for transcript in detailed_result['transcripts']:
        print(f"\n通道 ID: {transcript['channel_id']}")
        print(f"内容时长: {transcript['content_duration_in_milliseconds']} ms")
        print(f"完整文本: {transcript['text']}")
        
        print("\n句子:")
        for sentence in transcript['sentences']:
            print(f"  {sentence['begin_time']} - {sentence['end_time']} ms: {sentence['text']}")
            
            print("  词:")
            for word in sentence['words']:
                print(f"    {word['begin_time']} - {word['end_time']} ms: {word['text']}{word['punctuation']}")

def main():
    while True:
        input("按回车键开始录音...")
        
        audio_file = record_audio()
        if audio_file:
            audio_url = upload_audio(audio_file)
            if audio_url:
                print("正在进行语音识别...")
                task_id = submit_transcription_task(audio_url)
                print(f"任务已提交。任务 ID: {task_id}")

                final_response = poll_transcription_task(task_id)

                if final_response and final_response.output.task_status == 'SUCCEEDED':
                    print("转录成功完成。")
                    
                    for result in final_response.output.results:
                        print(f"\n处理文件: {result['file_url']}")
                        detailed_result = get_detailed_transcription(result['transcription_url'])
                        if detailed_result:
                            display_transcription_result(detailed_result)
                        else:
                            print("获取详细转录结果失败。")
                else:
                    print("转录失败或超时。")
            else:
                print("音频文件上传失败")
        else:
            print("录音失败")
        
        time.sleep(2)  # 等待2秒后准备下一次录音

if __name__ == "__main__":
    main()


按回车键开始录音...
开始录音...
请说话，录音将持续 20 秒...
还剩 20 秒...
还剩 19 秒...
还剩 18 秒...
还剩 17 秒...
还剩 16 秒...
还剩 15 秒...
还剩 14 秒...
还剩 13 秒...
还剩 12 秒...
还剩 11 秒...
还剩 10 秒...
还剩 9 秒...
还剩 8 秒...
还剩 7 秒...
还剩 6 秒...
还剩 5 秒...
还剩 4 秒...
还剩 3 秒...
还剩 2 秒...
还剩 1 秒...
录音完成
录音已保存为 /tmp/recording.wav
正在上传文件到 http://www.52ai.fun/api/upload
文件上传成功
服务器响应: {'message': 'File uploaded successfully', 'file_name': '0afdbcb3-af6d-4650-b525-5a9c07b8af4b.wav', 'file_url': 'http://www.52ai.fun/voice/0afdbcb3-af6d-4650-b525-5a9c07b8af4b.wav'}
服务器返回的音频URL: http://www.52ai.fun/voice/0afdbcb3-af6d-4650-b525-5a9c07b8af4b.wav
正在进行语音识别...
任务已提交。任务 ID: 2a04b6ef-1f0e-4d5b-adab-57f4eb9ae1a5
尝试 1: 任务状态 - RUNNING
尝试 2: 任务状态 - RUNNING
尝试 3: 任务状态 - RUNNING
尝试 4: 任务状态 - RUNNING
尝试 5: 任务状态 - SUCCEEDED
转录成功完成。

处理文件: http://www.52ai.fun/voice/0afdbcb3-af6d-4650-b525-5a9c07b8af4b.wav

详细转录结果:
文件 URL: http://www.52ai.fun/voice/0afdbcb3-af6d-4650-b525-5a9c07b8af4b.wav
音频格式: pcm_s16le
采样率: 16000 Hz
时长: 19968 ms

通道 ID: 0
内容时长: 15070 ms
完整文

KeyboardInterrupt: Interrupted by user


按 Enter 键开始录音，或输入 'q' 退出程序: 
开始录音...
再次按 Enter 键结束录音...


录音结束

录音过程中发生错误: 'Audio' object has no attribute 'stop'
录音失败或被取消

准备下一次录音...


KeyboardInterrupt: Interrupted by user

ImportError: cannot import name 'Button' from 'unihiker' (/usr/local/lib/python3.7/dist-packages/unihiker/__init__.py)

In [None]:
导入pinpong库，用来处理行空板按键事件

In [2]:
pip install pinpong

You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


GUI is cleared because of reinit


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_901/2495461554.py", line 168, in <module>
    time.sleep(0.1)  # 小延迟以减少 CPU 使用
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2077, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/ultratb.py",

TypeError: object of type 'NoneType' has no len()

In [1]:
pip install unihiker

You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
添加了屏幕的触控按钮，但是按钮会闪烁。

In [9]:
import os
import time
import json
import requests
from unihiker import GUI, Audio
import dashscope
from dashscope.audio.asr import Transcription
import threading

# 初始化 Audio
audio = Audio()

# 服务器API地址
SERVER_URL = "http://www.52ai.fun"
API_URL = f"{SERVER_URL}/api/upload"

# API密钥
SERVER_API_KEY = "1F1vmARoSjXRTDvywh9XtbnR8vd74AfffF0t0jn3qhM"
dashscope.api_key = 'sk-7c04ee6f9432492bb344baa7a5c0162f'

# 全局变量
is_recording = False
audio_file = "/tmp/recording.wav"
recording_start_time = 0
elapsed_time = 0
gui = None

def print_status(message):
    print(f"[状态] {message}")

def update_gui():
    global gui, is_recording, elapsed_time
    if is_recording:
        gui.add_button(x=120, y=110, w=100, h=30, text=f"录音中 {elapsed_time}s", origin='center', onclick=start_recording, name="start_button")
        gui.add_button(x=120, y=210, w=100, h=30, text="结束录音", origin='center', onclick=stop_recording, name="stop_button")
    else:
        gui.add_button(x=120, y=110, w=100, h=30, text="开始录音", origin='center', onclick=start_recording, name="start_button")
        if elapsed_time > 0:
            gui.add_button(x=120, y=210, w=100, h=30, text="录音识别中...", origin='center', onclick=stop_recording, name="stop_button")
        else:
            gui.add_button(x=120, y=210, w=100, h=30, text="", origin='center', onclick=lambda: None, name="stop_button")
    gui.add_button(x=120, y=310, w=100, h=30, text="退出", origin='center', onclick=lambda: exit(), name="exit_button")

def start_recording():
    global is_recording, recording_start_time, elapsed_time
    if not is_recording:
        print_status("开始录音按钮被点击")
        try:
            audio.start_record(audio_file)
            is_recording = True
            recording_start_time = time.time()
            elapsed_time = 0
            print_status("录音开始")
            update_gui()
        except Exception as e:
            print_status(f"开始录音时发生错误: {e}")

def stop_recording():
    global is_recording, elapsed_time
    if is_recording:
        print_status("结束录音按钮被点击")
        try:
            audio.stop_record()
            is_recording = False
            print_status("录音停止，开始处理音频")
            update_gui()
            threading.Thread(target=process_audio, daemon=True).start()
        except Exception as e:
            print_status(f"停止录音时发生错误: {e}")

def process_audio():
    global elapsed_time
    print_status(f"录音完成，文件保存为: {audio_file}")
    audio_url = upload_audio(audio_file)
    if audio_url:
        print_status("音频文件上传成功，开始语音识别...")
        task_id = submit_transcription_task(audio_url)
        print_status(f"任务已提交。任务 ID: {task_id}")

        final_response = poll_transcription_task(task_id)

        if final_response and final_response.output.task_status == 'SUCCEEDED':
            print_status("转录成功完成。")
            
            for result in final_response.output.results:
                print_status(f"处理文件: {result['file_url']}")
                detailed_result = get_detailed_transcription(result['transcription_url'])
                if detailed_result:
                    display_transcription_result(detailed_result)
                else:
                    print_status("获取详细转录结果失败。")
        else:
            print_status("转录失败或超时。")
    else:
        print_status("音频文件上传失败")
    
    elapsed_time = 0
    update_gui()
    print_status("音频处理完成")

def upload_audio(file_path):
    if not os.path.exists(file_path):
        print_status(f"错误：文件 {file_path} 不存在")
        return None

    try:
        with open(file_path, 'rb') as audio_file:
            headers = {
                'Content-Type': 'audio/wav',
                'X-API-Key': SERVER_API_KEY
            }
            print_status(f"正在上传文件到 {API_URL}")
            response = requests.post(API_URL, data=audio_file, headers=headers)
        
        if response.status_code == 200:
            print_status("文件上传成功")
            result = response.json()
            print_status(f"服务器响应: {result}")
            if 'file_url' in result:
                audio_url = result['file_url']
                print_status(f"服务器返回的音频URL: {audio_url}")
                return audio_url
            else:
                print_status("警告：服务器响应中没有 file_url 字段")
        elif response.status_code == 401:
            print_status("上传失败：无效的 API 密钥")
        else:
            print_status(f"上传失败，状态码: {response.status_code}")
            print_status(f"服务器响应: {response.text}")
        return None
    except requests.RequestException as e:
        print_status(f"上传过程中发生错误: {e}")
        return None

def submit_transcription_task(file_url):
    try:
        task_response = Transcription.async_call(
            model='paraformer-v2',
            file_urls=[file_url],
            language_hints=['zh', 'en']
        )
        print_status(f"转录任务提交成功，任务ID: {task_response.output.task_id}")
        return task_response.output.task_id
    except Exception as e:
        print_status(f"提交转录任务时发生错误: {e}")
        return None

def fetch_transcription_result(task_id):
    try:
        return Transcription.fetch(task=task_id)
    except Exception as e:
        print_status(f"获取转录结果时发生错误: {e}")
        return None

def poll_transcription_task(task_id, max_attempts=30, interval=2):
    for attempt in range(max_attempts):
        response = fetch_transcription_result(task_id)
        if response:
            status = response.output.task_status
            print_status(f"尝试 {attempt + 1}: 任务状态 - {status}")
            if status in ['SUCCEEDED', 'FAILED']:
                return response
        else:
            print_status(f"尝试 {attempt + 1}: 获取任务状态失败")
        time.sleep(interval)
    print_status("任务轮询超时")
    return None

def get_detailed_transcription(transcription_url):
    try:
        response = requests.get(transcription_url)
        if response.status_code == 200:
            print_status("成功获取详细转录结果")
            return response.json()
        else:
            print_status(f"获取详细转录失败。状态码: {response.status_code}")
            return None
    except Exception as e:
        print_status(f"获取详细转录时发生错误: {e}")
        return None

def display_transcription_result(detailed_result):
    print_status("\n详细转录结果:")
    print_status(f"文件 URL: {detailed_result['file_url']}")
    print_status(f"音频格式: {detailed_result['properties']['audio_format']}")
    print_status(f"采样率: {detailed_result['properties']['original_sampling_rate']} Hz")
    print_status(f"时长: {detailed_result['properties']['original_duration_in_milliseconds']} ms")
    
    for transcript in detailed_result['transcripts']:
        print_status(f"\n通道 ID: {transcript['channel_id']}")
        print_status(f"内容时长: {transcript['content_duration_in_milliseconds']} ms")
        print_status(f"完整文本: {transcript['text']}")
        
        print_status("\n句子:")
        for sentence in transcript['sentences']:
            print_status(f"  {sentence['begin_time']} - {sentence['end_time']} ms: {sentence['text']}")
            
            print_status("  词:")
            for word in sentence['words']:
                print_status(f"    {word['begin_time']} - {word['end_time']} ms: {word['text']}{word['punctuation']}")

def main():
    global gui, is_recording, elapsed_time
    gui = GUI()
    
    update_gui()
    print_status("程序初始化完成，等待用户操作")

    last_update_time = time.time()
    # 主循环
    while True:
        if is_recording:
            current_time = time.time()
            new_elapsed_time = int(current_time - recording_start_time)
            if new_elapsed_time != elapsed_time:
                elapsed_time = new_elapsed_time
                print_status(f"录音进行中，已录制 {elapsed_time} 秒")
                if current_time - last_update_time >= 1:  # 每秒更新一次 GUI
                    update_gui()
                    last_update_time = current_time
        time.sleep(0.1)  # 小延迟以减少 CPU 使用

if __name__ == "__main__":
    main()


GUI is cleared because of reinit
[状态] 程序初始化完成，等待用户操作
[状态] 开始录音按钮被点击
[状态] 录音开始
[状态] 录音进行中，已录制 1 秒
[状态] 录音进行中，已录制 2 秒
[状态] 录音进行中，已录制 3 秒
[状态] 录音进行中，已录制 4 秒
[状态] 录音进行中，已录制 5 秒
[状态] 录音进行中，已录制 6 秒
[状态] 录音进行中，已录制 7 秒
[状态] 录音进行中，已录制 8 秒
[状态] 录音进行中，已录制 9 秒
[状态] 录音进行中，已录制 10 秒
[状态] 录音进行中，已录制 11 秒
[状态] 录音进行中，已录制 12 秒
[状态] 录音进行中，已录制 13 秒
[状态] 录音进行中，已录制 14 秒
[状态] 录音进行中，已录制 15 秒
[状态] 录音进行中，已录制 16 秒
[状态] 录音进行中，已录制 17 秒
[状态] 录音进行中，已录制 18 秒
[状态] 录音进行中，已录制 19 秒
[状态] 录音进行中，已录制 20 秒
[状态] 录音进行中，已录制 21 秒
[状态] 录音进行中，已录制 22 秒
[状态] 录音进行中，已录制 23 秒
[状态] 录音进行中，已录制 24 秒
[状态] 录音进行中，已录制 25 秒
[状态] 录音进行中，已录制 26 秒
[状态] 录音进行中，已录制 27 秒
[状态] 录音进行中，已录制 28 秒
[状态] 录音进行中，已录制 29 秒
[状态] 录音进行中，已录制 30 秒
[状态] 录音进行中，已录制 31 秒
[状态] 录音进行中，已录制 32 秒
[状态] 录音进行中，已录制 33 秒
[状态] 录音进行中，已录制 34 秒
[状态] 录音进行中，已录制 35 秒
[状态] 录音进行中，已录制 36 秒
[状态] 录音进行中，已录制 37 秒
[状态] 录音进行中，已录制 38 秒
[状态] 录音进行中，已录制 39 秒
[状态] 录音进行中，已录制 40 秒
[状态] 录音进行中，已录制 41 秒
[状态] 录音进行中，已录制 42 秒
[状态] 录音进行中，已录制 43 秒
[状态] 录音进行中，已录制 44 秒
[状态] 录音进行中，已录制 45 秒
[状态] 录音进行中，已录制 46 秒
[状态] 结束录音按钮

KeyboardInterrupt: 

In [None]:
重新设计了按钮布局，有按钮不断刷新的问题。

In [11]:
import os
import time
import json
import requests
from unihiker import GUI, Audio
import dashscope
from dashscope.audio.asr import Transcription
import threading

# 初始化 Audio
audio = Audio()

# 服务器API地址
SERVER_URL = "http://www.52ai.fun"
API_URL = f"{SERVER_URL}/api/upload"

# API密钥
SERVER_API_KEY = "1F1vmARoSjXRTDvywh9XtbnR8vd74AfffF0t0jn3qhM"
dashscope.api_key = 'sk-7c04ee6f9432492bb344baa7a5c0162f'

# 全局变量
is_recording = False
audio_file = "/tmp/recording.wav"
recording_start_time = 0
elapsed_time = 0

def print_status(message):
    print(f"[状态] {message}")

def update_gui():
    global gui
    gui = GUI()  # 重新初始化 GUI
    if is_recording:
        gui.add_button(x=120, y=80, w=160, h=60, text=f"录音中 {elapsed_time}s", origin='center', onclick=start_recording, name="start_button", state="disabled")
        gui.add_button(x=120, y=160, w=160, h=60, text="结束录音", origin='center', onclick=stop_recording, name="stop_button")
    else:
        gui.add_button(x=120, y=80, w=160, h=60, text="开始录音", origin='center', onclick=start_recording, name="start_button")
        gui.add_button(x=120, y=160, w=160, h=60, text="结束录音", origin='center', onclick=stop_recording, name="stop_button", state="disabled")
    gui.add_button(x=120, y=240, w=160, h=60, text="退出", origin='center', onclick=lambda: exit(), name="exit_button")

def start_recording():
    global is_recording, recording_start_time, elapsed_time
    if not is_recording:
        print_status("开始录音按钮被点击")
        try:
            audio.start_record(audio_file)
            is_recording = True
            recording_start_time = time.time()
            elapsed_time = 0
            print_status("录音开始")
            update_gui()
        except Exception as e:
            print_status(f"开始录音时发生错误: {e}")

def stop_recording():
    global is_recording, elapsed_time
    if is_recording:
        print_status("结束录音按钮被点击")
        try:
            audio.stop_record()
            is_recording = False
            print_status("录音停止，开始处理音频")
            update_gui()
            threading.Thread(target=process_audio, daemon=True).start()
        except Exception as e:
            print_status(f"停止录音时发生错误: {e}")

def process_audio():
    global elapsed_time, gui
    print_status(f"录音完成，文件保存为: {audio_file}")
    gui.add_button(x=120, y=160, w=160, h=60, text="录音识别中...", origin='center', onclick=stop_recording, name="stop_button", state="disabled")
    
    audio_url = upload_audio(audio_file)
    if audio_url:
        print_status("音频文件上传成功，开始语音识别...")
        task_id = submit_transcription_task(audio_url)
        print_status(f"任务已提交。任务 ID: {task_id}")

        final_response = poll_transcription_task(task_id)

        if final_response and final_response.output.task_status == 'SUCCEEDED':
            print_status("转录成功完成。")
            
            for result in final_response.output.results:
                print_status(f"处理文件: {result['file_url']}")
                detailed_result = get_detailed_transcription(result['transcription_url'])
                if detailed_result:
                    display_transcription_result(detailed_result)
                else:
                    print_status("获取详细转录结果失败。")
        else:
            print_status("转录失败或超时。")
    else:
        print_status("音频文件上传失败")
    
    elapsed_time = 0
    update_gui()
    print_status("音频处理完成")

def upload_audio(file_path):
    if not os.path.exists(file_path):
        print_status(f"错误：文件 {file_path} 不存在")
        return None

    try:
        with open(file_path, 'rb') as audio_file:
            headers = {
                'Content-Type': 'audio/wav',
                'X-API-Key': SERVER_API_KEY
            }
            print_status(f"正在上传文件到 {API_URL}")
            response = requests.post(API_URL, data=audio_file, headers=headers)
        
        if response.status_code == 200:
            print_status("文件上传成功")
            result = response.json()
            print_status(f"服务器响应: {result}")
            if 'file_url' in result:
                audio_url = result['file_url']
                print_status(f"服务器返回的音频URL: {audio_url}")
                return audio_url
            else:
                print_status("警告：服务器响应中没有 file_url 字段")
        elif response.status_code == 401:
            print_status("上传失败：无效的 API 密钥")
        else:
            print_status(f"上传失败，状态码: {response.status_code}")
            print_status(f"服务器响应: {response.text}")
        return None
    except requests.RequestException as e:
        print_status(f"上传过程中发生错误: {e}")
        return None

def submit_transcription_task(file_url):
    try:
        task_response = Transcription.async_call(
            model='paraformer-v2',
            file_urls=[file_url],
            language_hints=['zh', 'en']
        )
        print_status(f"转录任务提交成功，任务ID: {task_response.output.task_id}")
        return task_response.output.task_id
    except Exception as e:
        print_status(f"提交转录任务时发生错误: {e}")
        return None

def fetch_transcription_result(task_id):
    try:
        return Transcription.fetch(task=task_id)
    except Exception as e:
        print_status(f"获取转录结果时发生错误: {e}")
        return None

def poll_transcription_task(task_id, max_attempts=30, interval=2):
    for attempt in range(max_attempts):
        response = fetch_transcription_result(task_id)
        if response:
            status = response.output.task_status
            print_status(f"尝试 {attempt + 1}: 任务状态 - {status}")
            if status in ['SUCCEEDED', 'FAILED']:
                return response
        else:
            print_status(f"尝试 {attempt + 1}: 获取任务状态失败")
        time.sleep(interval)
    print_status("任务轮询超时")
    return None

def get_detailed_transcription(transcription_url):
    try:
        response = requests.get(transcription_url)
        if response.status_code == 200:
            print_status("成功获取详细转录结果")
            return response.json()
        else:
            print_status(f"获取详细转录失败。状态码: {response.status_code}")
            return None
    except Exception as e:
        print_status(f"获取详细转录时发生错误: {e}")
        return None

def display_transcription_result(detailed_result):
    print_status("\n详细转录结果:")
    print_status(f"文件 URL: {detailed_result['file_url']}")
    print_status(f"音频格式: {detailed_result['properties']['audio_format']}")
    print_status(f"采样率: {detailed_result['properties']['original_sampling_rate']} Hz")
    print_status(f"时长: {detailed_result['properties']['original_duration_in_milliseconds']} ms")
    
    for transcript in detailed_result['transcripts']:
        print_status(f"\n通道 ID: {transcript['channel_id']}")
        print_status(f"内容时长: {transcript['content_duration_in_milliseconds']} ms")
        print_status(f"完整文本: {transcript['text']}")
        
        print_status("\n句子:")
        for sentence in transcript['sentences']:
            print_status(f"  {sentence['begin_time']} - {sentence['end_time']} ms: {sentence['text']}")
            
            print_status("  词:")
            for word in sentence['words']:
                print_status(f"    {word['begin_time']} - {word['end_time']} ms: {word['text']}{word['punctuation']}")

def main():
    global elapsed_time, gui
    gui = GUI()
    
    update_gui()
    print_status("程序初始化完成，等待用户操作")

    # 主循环
    while True:
        if is_recording:
            current_time = time.time()
            new_elapsed_time = int(current_time - recording_start_time)
            if new_elapsed_time != elapsed_time:
                elapsed_time = new_elapsed_time
                print_status(f"录音进行中，已录制 {elapsed_time} 秒")
                update_gui()
        time.sleep(0.1)  # 小延迟以减少 CPU 使用

if __name__ == "__main__":
    main()


GUI is cleared because of reinit
GUI is cleared because of reinit
[状态] 程序初始化完成，等待用户操作
[状态] 开始录音按钮被点击
[状态] 录音开始
GUI is cleared because of reinit
[状态] 录音进行中，已录制 1 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 2 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 3 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 4 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 5 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 6 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 7 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 8 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 9 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 10 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 11 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 12 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 13 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 14 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 15 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 16 秒
GUI is cleared because of reinit
[状态] 录音进行中，已录制 17

KeyboardInterrupt: 

GUI is cleared because of reinit
GUI is cleared because of reinit


AttributeError: 'GUI' object has no attribute 'add_label'

In [None]:
import os
import time
import json
import requests
from unihiker import GUI, Audio
import dashscope
from dashscope.audio.asr import Transcription
import threading

# 初始化 Audio 和 GUI
audio = Audio()
gui = GUI()

# 服务器API地址
SERVER_URL = "http://www.52ai.fun"
API_URL = f"{SERVER_URL}/api/upload"

# API密钥
SERVER_API_KEY = "1F1vmARoSjXRTDvywh9XtbnR8vd74AfffF0t0jn3qhM"
dashscope.api_key = 'sk-7c04ee6f9432492bb344baa7a5c0162f'

# 全局变量
is_recording = False
audio_file = "/tmp/recording.wav"
recording_start_time = 0
elapsed_time = 0
time_text = None

def print_status(message):
    print(f"[状态] {message}")

def update_gui():
    global gui, time_text
    gui.clear()  # 清除之前的所有元素
    time_text = gui.draw_text(x=120, y=40, text="", origin='center')
    if is_recording:
        gui.add_button(x=120, y=100, w=160, h=60, text="录音中", origin='center', onclick=start_recording, name="start_button", state="disabled")
        gui.add_button(x=120, y=180, w=160, h=60, text="结束录音", origin='center', onclick=stop_recording, name="stop_button")
    else:
        gui.add_button(x=120, y=100, w=160, h=60, text="开始录音", origin='center', onclick=start_recording, name="start_button")
        gui.add_button(x=120, y=180, w=160, h=60, text="结束录音", origin='center', onclick=stop_recording, name="stop_button", state="disabled")
    gui.add_button(x=120, y=260, w=160, h=60, text="退出", origin='center', onclick=lambda: exit(), name="exit_button")

def update_time_text():
    global time_text
    if time_text:
        time_text.text = f"录音时间: {elapsed_time}秒"

def start_recording():
    global is_recording, recording_start_time, elapsed_time
    if not is_recording:
        print_status("开始录音按钮被点击")
        try:
            audio.start_record(audio_file)
            is_recording = True
            recording_start_time = time.time()
            elapsed_time = 0
            print_status("录音开始")
            update_gui()
        except Exception as e:
            print_status(f"开始录音时发生错误: {e}")

def stop_recording():
    global is_recording, elapsed_time
    if is_recording:
        print_status("结束录音按钮被点击")
        try:
            audio.stop_record()
            is_recording = False
            print_status("录音停止，开始处理音频")
            update_gui()
            threading.Thread(target=process_audio, daemon=True).start()
        except Exception as e:
            print_status(f"停止录音时发生错误: {e}")

def process_audio():
    global elapsed_time, gui
    print_status(f"录音完成，文件保存为: {audio_file}")
    gui.add_button(x=120, y=180, w=160, h=60, text="录音识别中...", origin='center', onclick=lambda: None, name="stop_button")
    
    audio_url = upload_audio(audio_file)
    if audio_url:
        print_status("音频文件上传成功，开始语音识别...")
        task_id = submit_transcription_task(audio_url)
        print_status(f"任务已提交。任务 ID: {task_id}")

        final_response = poll_transcription_task(task_id)

        if final_response and final_response.output.task_status == 'SUCCEEDED':
            print_status("转录成功完成。")
            
            for result in final_response.output.results:
                print_status(f"处理文件: {result['file_url']}")
                detailed_result = get_detailed_transcription(result['transcription_url'])
                if detailed_result:
                    display_transcription_result(detailed_result)
                else:
                    print_status("获取详细转录结果失败。")
        else:
            print_status("转录失败或超时。")
    else:
        print_status("音频文件上传失败")
    
    elapsed_time = 0
    update_gui()
    print_status("音频处理完成")

def upload_audio(file_path):
    if not os.path.exists(file_path):
        print_status(f"错误：文件 {file_path} 不存在")
        return None

    try:
        with open(file_path, 'rb') as audio_file:
            headers = {
                'Content-Type': 'audio/wav',
                'X-API-Key': SERVER_API_KEY
            }
            print_status(f"正在上传文件到 {API_URL}")
            response = requests.post(API_URL, data=audio_file, headers=headers)
        
        if response.status_code == 200:
            print_status("文件上传成功")
            result = response.json()
            print_status(f"服务器响应: {result}")
            if 'file_url' in result:
                audio_url = result['file_url']
                print_status(f"服务器返回的音频URL: {audio_url}")
                return audio_url
            else:
                print_status("警告：服务器响应中没有 file_url 字段")
        elif response.status_code == 401:
            print_status("上传失败：无效的 API 密钥")
        else:
            print_status(f"上传失败，状态码: {response.status_code}")
            print_status(f"服务器响应: {response.text}")
        return None
    except requests.RequestException as e:
        print_status(f"上传过程中发生错误: {e}")
        return None

def submit_transcription_task(file_url):
    try:
        task_response = Transcription.async_call(
            model='paraformer-v2',
            file_urls=[file_url],
            language_hints=['zh', 'en']
        )
        print_status(f"转录任务提交成功，任务ID: {task_response.output.task_id}")
        return task_response.output.task_id
    except Exception as e:
        print_status(f"提交转录任务时发生错误: {e}")
        return None

def fetch_transcription_result(task_id):
    try:
        return Transcription.fetch(task=task_id)
    except Exception as e:
        print_status(f"获取转录结果时发生错误: {e}")
        return None

def poll_transcription_task(task_id, max_attempts=30, interval=2):
    for attempt in range(max_attempts):
        response = fetch_transcription_result(task_id)
        if response:
            status = response.output.task_status
            print_status(f"尝试 {attempt + 1}: 任务状态 - {status}")
            if status in ['SUCCEEDED', 'FAILED']:
                return response
        else:
            print_status(f"尝试 {attempt + 1}: 获取任务状态失败")
        time.sleep(interval)
    print_status("任务轮询超时")
    return None

def get_detailed_transcription(transcription_url):
    try:
        response = requests.get(transcription_url)
        if response.status_code == 200:
            print_status("成功获取详细转录结果")
            return response.json()
        else:
            print_status(f"获取详细转录失败。状态码: {response.status_code}")
            return None
    except Exception as e:
        print_status(f"获取详细转录时发生错误: {e}")
        return None

def display_transcription_result(detailed_result):
    print_status("\n详细转录结果:")
    print_status(f"文件 URL: {detailed_result['file_url']}")
    print_status(f"音频格式: {detailed_result['properties']['audio_format']}")
    print_status(f"采样率: {detailed_result['properties']['original_sampling_rate']} Hz")
    print_status(f"时长: {detailed_result['properties']['original_duration_in_milliseconds']} ms")
    
    for transcript in detailed_result['transcripts']:
        print_status(f"\n通道 ID: {transcript['channel_id']}")
        print_status(f"内容时长: {transcript['content_duration_in_milliseconds']} ms")
        print_status(f"完整文本: {transcript['text']}")
        
        print_status("\n句子:")
        for sentence in transcript['sentences']:
            print_status(f"  {sentence['begin_time']} - {sentence['end_time']} ms: {sentence['text']}")
            
            print_status("  词:")
            for word in sentence['words']:
                print_status(f"    {word['begin_time']} - {word['end_time']} ms: {word['text']}{word['punctuation']}")

def main():
    global elapsed_time
    
    update_gui()
    print_status("程序初始化完成，等待用户操作")

    # 主循环
    while True:
        if is_recording:
            current_time = time.time()
            new_elapsed_time = int(current_time - recording_start_time)
            if new_elapsed_time != elapsed_time:
                elapsed_time = new_elapsed_time
                print_status(f"录音进行中，已录制 {elapsed_time} 秒")
                update_time_text()
        time.sleep(0.1)  # 小延迟以减少 CPU 使用

if __name__ == "__main__":
    main()


GUI is cleared because of reinit
[状态] 程序初始化完成，等待用户操作
[状态] 开始录音按钮被点击
[状态] 录音开始
[状态] 录音进行中，已录制 1 秒
[状态] 录音进行中，已录制 2 秒
[状态] 录音进行中，已录制 3 秒
[状态] 录音进行中，已录制 4 秒
[状态] 录音进行中，已录制 5 秒
[状态] 录音进行中，已录制 6 秒
[状态] 录音进行中，已录制 7 秒
[状态] 录音进行中，已录制 8 秒
[状态] 录音进行中，已录制 9 秒
[状态] 录音进行中，已录制 10 秒
[状态] 录音进行中，已录制 11 秒
[状态] 录音进行中，已录制 12 秒
[状态] 录音进行中，已录制 13 秒
[状态] 录音进行中，已录制 14 秒
[状态] 录音进行中，已录制 15 秒
[状态] 录音进行中，已录制 16 秒
[状态] 录音进行中，已录制 17 秒
[状态] 录音进行中，已录制 18 秒
[状态] 录音进行中，已录制 19 秒
[状态] 录音进行中，已录制 20 秒
[状态] 录音进行中，已录制 21 秒
[状态] 录音进行中，已录制 22 秒
[状态] 录音进行中，已录制 23 秒
[状态] 录音进行中，已录制 24 秒
[状态] 录音进行中，已录制 25 秒
[状态] 录音进行中，已录制 26 秒
[状态] 录音进行中，已录制 27 秒
[状态] 录音进行中，已录制 28 秒
[状态] 录音进行中，已录制 29 秒
[状态] 录音进行中，已录制 30 秒
[状态] 录音进行中，已录制 31 秒
[状态] 录音进行中，已录制 32 秒
[状态] 录音进行中，已录制 33 秒
[状态] 录音进行中，已录制 34 秒
[状态] 录音进行中，已录制 35 秒
[状态] 录音进行中，已录制 36 秒
[状态] 录音进行中，已录制 37 秒
[状态] 录音进行中，已录制 38 秒
[状态] 录音进行中，已录制 39 秒
[状态] 录音进行中，已录制 40 秒
[状态] 录音进行中，已录制 41 秒
[状态] 录音进行中，已录制 42 秒
[状态] 录音进行中，已录制 43 秒
[状态] 录音进行中，已录制 44 秒
[状态] 录音进行中，已录制 45 秒
[状态] 录音进行中，已录制 46 秒
[状态] 录音进行中，