# 5.1语音识别大模型

录音识别
https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=seedasr-auc

语音合成
https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=ve-tts



本章最终实现语音控制无人机飞行的案例，所以作为预备知识，本节课要学习语音转文字，文字合成语音大模型的接口调用。

创建应用
https://console.volcengine.com/speech/app




In [None]:
import json
import time
import uuid
import requests

# 读取配置文件
def read_config():
    try:
        with open('token.json', 'r') as file:
            config = json.load(file)
            return config.get('appid'), config.get('token')
    except FileNotFoundError:
        print("配置文件 ' token.json' 未找到，请检查。")
        exit(1)
    except json.JSONDecodeError:
        print("配置文件 ' token.json' 格式错误，请检查。")
        exit(1)

def submit_task():
    submit_url = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit"
    task_id = str(uuid.uuid4())
    appid, token = read_config()
    headers = {
        "X-Api-App-Key": appid,
        "X-Api-Access-Key": token,
        "X-Api-Resource-Id": "volc.bigasr.auc",
        "X-Api-Request-Id": task_id,
        "X-Api-Sequence": "-1"
    }
    request = {
        "user": {
            "uid": "fake_uid"
        },
        "audio": {
            "url": file_url,
            "format": "mp3",
            "codec": "raw",
            "rate": 16000,
            "bits": 16,
            "channel": 1
        },
        "request": {
            "model_name": "bigmodel",
            # "enable_itn": True,
            # "enable_punc": True,
            # "enable_ddc": True,
            "show_utterances": True,
            # "enable_channel_split": True,
            # "vad_segment": True,
            # "enable_speaker_info": True,
            "corpus": {
                # "boosting_table_name": "test",
                "correct_table_name": "",
                "context": ""
            }
        }
    }
    print(f'Submit task id: {task_id}')
    response = requests.post(submit_url, data=json.dumps(request), headers=headers)
    if 'X-Api-Status-Code' in response.headers and response.headers["X-Api-Status-Code"] == "20000000":
        print(f'Submit task response header X-Api-Status-Code: {response.headers["X-Api-Status-Code"]}')
        print(f'Submit task response header X-Api-Message: {response.headers["X-Api-Message"]}')
        x_tt_logid = response.headers.get("X-Tt-Logid", "")
        print(f'Submit task response header X-Tt-Logid: {response.headers["X-Tt-Logid"]}\n')
        return task_id, x_tt_logid
    else:
        print(f'Submit task failed and the response headers are: {response.headers}')
        exit(1)
    return task_id


def query_task(task_id, x_tt_logid):
    query_url = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/query"
    appid, token = read_config()
    headers = {
        "X-Api-App-Key": appid,
        "X-Api-Access-Key": token,
        "X-Api-Resource-Id": "volc.bigasr.auc",
        "X-Api-Request-Id": task_id,
        "X-Tt-Logid": x_tt_logid  # 固定传递 x-tt-logid
    }
    response = requests.post(query_url, json.dumps({}), headers=headers)
    if 'X-Api-Status-Code' in response.headers:
        print(f'Query task response header X-Api-Status-Code: {response.headers["X-Api-Status-Code"]}')
        print(f'Query task response header X-Api-Message: {response.headers["X-Api-Message"]}')
        print(f'Query task response header X-Tt-Logid: {response.headers["X-Tt-Logid"]}\n')
    else:
        print(f'Query task failed and the response headers are: {response.headers}')
        exit(1)
    return response


def main():
    task_id, x_tt_logid = submit_task()
    while True:
        query_response = query_task(task_id, x_tt_logid)
        code = query_response.headers.get('X-Api-Status-Code', "")
        if code == '20000000':  # task finished
            print(query_response.json())
            print("SUCCESS!")
            exit(0)
        elif code != '20000001' and code != '20000002':  # task failed
            print("FAILED!")
            exit(1)
        time.sleep(1)

# 需要使用在线url，推荐使用TOS
file_url = "https://yt-shanghai.tos-cn-shanghai.volces.com/mp3%E8%AF%AD%E9%9F%B3/%E7%81%AB%E5%B1%B1%E4%BA%91%E8%AF%AD%E9%9F%B3%E6%B5%8B%E8%AF%95.mp3"


if __name__ == '__main__':
    main()


语音合成接口调用



In [6]:
#coding=utf-8

'''
requires Python 3.6 or later
pip install requests
语音合成
'''
import base64
import json
import uuid
import requests

# 填写平台申请的appid, access_token以及cluster
def read_config():
    try:
        with open('token.json', 'r') as file:
            config = json.load(file)
            return config.get('appid'), config.get('token')
    except FileNotFoundError:
        print("配置文件 ' token.json' 未找到，请检查。")
        exit(1)
    except json.JSONDecodeError:
        print("配置文件 ' token.json' 格式错误，请检查。")
        exit(1)



appid, access_token = read_config()
cluster = "volcano_tts"

# voice_type = "BV700_streaming"
voice_type = "zh_male_yangguangqingnian_moon_bigtts"
host = "openspeech.bytedance.com"
api_url = f"https://openspeech.bytedance.com/api/v1/tts"

header = {"Authorization": f"Bearer;{access_token}"}
text = "你好，我是小爱同学。"

request_json = {
    "app": {
        "appid": appid,
        "token": "access_token",
        "cluster": cluster
    },
    "user": {
        "uid": "388808087185088"
    },
    "audio": {
        "voice_type": voice_type,
        "encoding": "mp3",
        "speed_ratio": 1.0,
        "volume_ratio": 1.0,
        "pitch_ratio": 1.0,
    },
    "request": {
        "reqid": str(uuid.uuid4()),
        "text": text,
        "text_type": "plain",
        "operation": "query",
        "with_frontend": 1,
        "frontend_type": "unitTson"

    }
}

if __name__ == '__main__':
    try:
        resp = requests.post(api_url, json.dumps(request_json), headers=header)
        print(f"resp body: \n{resp.json()}")
        if "data" in resp.json():
            data = resp.json()["data"]
            file_to_save = open("test_submit.mp3", "wb")
            file_to_save.write(base64.b64decode(data))
    except Exception as e:
        e.with_traceback()


resp body: 
{'reqid': 'd0898447-f34b-44fc-8859-a28937604058', 'code': 3000, 'operation': 'query', 'message': 'Success', 'sequence': -1, 'data': '//PkxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA//PkxAA37CpOXkmTowVI3JLZbtt+DBGThcnRk6NskYsjFYXRkZOKBQgQAgKBQKCQVitG3tz9kCBAgJEBAKBQCYrFYrFYrFckDajuoxD0jFZPvm3NGxiiCBAKAoKB54DTsoAIkyBCI8QgxMVtrisVgmiJFE2gQZRo5Jhhjo0YrFYrJyRAxNG3SBAKzalyQMIISIBQSXUFEDSCKMjJ9gxJBAg