In [1]:
 import asyncio
import json
import pyaudio
import websockets
import ssl
import nest_asyncio
from openai import OpenAI
from collections import deque
import requests
import sounddevice as sd
import soundfile as sf
from io import BytesIO
import time
import wave

# 允许嵌套事件循环
nest_asyncio.apply()

class MicrophoneClient:
    def __init__(self, host="localhost", port=10095, use_ssl=True, deepseek_api_key=None, tts_url="http://127.0.0.1:9880"):
        self.host = host
        self.port = port
        self.use_ssl = use_ssl
        self.chunk_size = [5, 10, 5]
        self.chunk_interval = 10
        self.websocket = None
        self.last_text = ""
        self.tts_url = tts_url
        self.recording = True  # 控制录音状态
        self.stream = None  # 保存音频流对象
        self.audio = None  # 保存PyAudio对象
        
        # 初始化DeepSeek客户端
        self.deepseek_client = OpenAI(
            api_key=deepseek_api_key, 
            base_url="https://api.deepseek.com"
        ) if deepseek_api_key else None
        
        # 存储对话历史，限制保存最近10轮对话
        self.conversation_history = deque(maxlen=10)
        
    def play_audio(self, wav_data):
        """播放音频数据"""
        try:
            # 从二进制数据创建一个BytesIO对象
            wav_io = BytesIO(wav_data)
            
            # 使用soundfile读取音频参数
            wav_io.seek(0)
            data, samplerate = sf.read(wav_io)
            
            # 使用同步模式播放音频，确保播放完成后才返回
            sd.play(data, samplerate, blocking=True)
            
            # 播放完成后短暂等待
            time.sleep(0.3)
                
        except Exception as e:
            print(f"音频播放错误: {e}")

    async def text_to_speech(self, text):
        """调用TTS API将文本转换为语音"""
        try:
            # 先停止录音
            self.recording = False
            if self.stream:
                self.stream.stop_stream()
                await asyncio.sleep(1.0)  # 等待1秒确保录音完全停止并且有足够的缓冲
            
            # TTS API调用参数
            payload = {
                "text": text,
                "text_language": "zh",
            }
            
            print("\n正在生成语音...")
            # 发送POST请求到TTS API
            response = requests.post(self.tts_url, json=payload)
            
            if response.status_code == 200:
                print("播放中...")
                # 播放返回的音频数据
                self.play_audio(response.content)
                # 在播放完成后等待额外的时间，确保声音完全消失
                await asyncio.sleep(1.5)
            else:
                print(f"TTS API调用失败: {response.status_code}")
            
            print("准备继续录音...")
            # 恢复录音前再等待一段时间
            await asyncio.sleep(1.5)
            if self.stream:
                self.stream.start_stream()
            self.recording = True
            print("录音已恢复")
                
        except Exception as e:
            print(f"TTS转换错误: {e}")
            # 确保录音恢复
            if self.stream:
                self.stream.start_stream()
            self.recording = True
        
    async def get_ai_response(self, text):
        """获取DeepSeek的回复"""
        if not self.deepseek_client:
            return "未配置DeepSeek API密钥"
            
        try:
            messages = [
                {"role": "system", "content": (
                    "你是回龙观医院心理科王医生，请针对患者或患者家属的提问给出合适的回复。"
                    "回复要言简意赅，控制在100字以内。"
                )}
            ]
            
            for history in self.conversation_history:
                messages.append({"role": "user", "content": history["user"]})
                messages.append({"role": "assistant", "content": history["assistant"]})
            
            messages.append({"role": "user", "content": text})
            
            response = await asyncio.get_event_loop().run_in_executor(
                None,
                lambda: self.deepseek_client.chat.completions.create(
                    model="deepseek-chat",
                    messages=messages,
                    max_tokens=400,
                    stream=False
                )
            )
            ai_reply = response.choices[0].message.content
            
            # 存储当前对话
            self.conversation_history.append({
                "user": text,
                "assistant": ai_reply
            })
            
            return ai_reply
        except Exception as e:
            return f"获取AI回复失败: {str(e)}"
        
    async def start_streaming(self):
        MAX_RETRIES = 3
        RETRY_DELAY = 2
        retry_count = 0

        while retry_count < MAX_RETRIES:
            try:
                uri = f"{'wss' if self.use_ssl else 'ws'}://{self.host}:{self.port}"
                ssl_context = None
                if self.use_ssl:
                    ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
                    ssl_context.check_hostname = False
                    ssl_context.verify_mode = ssl.CERT_NONE
                
                print(f"正在连接服务器 {uri}...")
                # 使用 connect 而不是直接传入 timeout
                async with websockets.connect(
                    uri, 
                    subprotocols=["binary"], 
                    ping_interval=None, 
                    ssl=ssl_context
                ) as self.websocket:
                    print("连接成功！")
                    await asyncio.gather(
                        self._record_microphone(),
                        self._receive_messages()
                    )
                break  # 如果连接成功并正常运行，跳出重试循环
            except (websockets.exceptions.WebSocketException, TimeoutError, ConnectionRefusedError) as e:
                retry_count += 1
                if retry_count < MAX_RETRIES:
                    print(f"连接失败: {str(e)}")
                    print(f"将在 {RETRY_DELAY} 秒后进行第 {retry_count + 1} 次重试...")
                    await asyncio.sleep(RETRY_DELAY)
                else:
                    print(f"连接失败，已达到最大重试次数 ({MAX_RETRIES})。")
                    print("请检查:")
                    print("1. 语音识别服务是否已启动")
                    print(f"2. 服务器地址 ({self.host}) 和端口 ({self.port}) 是否正确")
                    print("3. 网络连接是否正常")
                    print(f"详细错误: {str(e)}")
                    raise
    
    async def _record_microphone(self):
        FORMAT = pyaudio.paInt16
        CHANNELS = 1
        RATE = 16000
        chunk_size = 60 * self.chunk_size[1] / self.chunk_interval
        CHUNK = int(RATE / 1000 * chunk_size)
        
        self.audio = pyaudio.PyAudio()
        self.stream = self.audio.open(
            format=FORMAT,
            channels=CHANNELS,
            rate=RATE,
            input=True,
            frames_per_buffer=CHUNK
        )
        
        config_message = json.dumps({
            "mode": "2pass",
            "chunk_size": self.chunk_size,
            "chunk_interval": self.chunk_interval,
            "wav_name": "microphone",
            "is_speaking": True,
            "hotwords": "",
            "itn": True
        })
        await self.websocket.send(config_message)
        
        try:
            while True:
                if self.recording:  # 只在recording为True时进行录音
                    try:
                        data = self.stream.read(CHUNK)
                        await self.websocket.send(data)
                    except Exception as e:
                        if self.recording:  # 只在正常录音时报错
                            print(f"Recording error: {e}")
                await asyncio.sleep(0.005)
        except Exception as e:
            print(f"Recording error: {e}")
        finally:
            if self.stream:
                self.stream.stop_stream()
                self.stream.close()
            if self.audio:
                self.audio.terminate()
    
    async def _receive_messages(self):
        try:
            while True:
                message = await self.websocket.recv()
                try:
                    msg_data = json.loads(message)
                    if "text" in msg_data:
                        new_text = msg_data["text"]
                        mode = msg_data.get("mode", "")
                        
                        if len(new_text.strip()) == 0:
                            continue
                            
                        if mode == "2pass-online":
                            if new_text != self.last_text:
                                print(f"\r实时识别: {new_text}", end="")
                                self.last_text = new_text
                        elif mode == "2pass-offline":
                            print(f"\nVAD结果: {new_text}")
                            # 获取AI回复
                            ai_response = await self.get_ai_response(new_text)
                            print(f"AI回复: {ai_response}\n")
                            # 转换为语音并播放
                            await self.text_to_speech(ai_response)
                            print(f"[对话历史: {len(self.conversation_history)}轮]\n")
                            self.last_text = ""
                            
                except json.JSONDecodeError:
                    continue
                except Exception as e:
                    print(f"处理消息错误: {e}")
        except Exception as e:
            print(f"接收消息错误: {e}")

def start_mic_client(deepseek_api_key=None, tts_url="http://127.0.0.1:9880", host="localhost", port=10095):
    """启动麦克风客户端的便捷函数"""
    client = MicrophoneClient(
        deepseek_api_key=deepseek_api_key, 
        tts_url=tts_url,
        host=host,
        port=port
    )
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(client.start_streaming())
    except KeyboardInterrupt:
        print("\n录音已停止")
    except Exception as e:
        print(f"发生错误: {e}")
        print("\n如需重新启动，请再次运行程序。")
    finally:
        if hasattr(client, 'stream') and client.stream:
            client.stream.stop_stream()
            client.stream.close()
        if hasattr(client, 'audio') and client.audio:
            client.audio.terminate()

# 使用示例
if __name__ == "__main__":
    # 配置参数
    DEEPSEEK_API_KEY = "sk-a753a785ce4f4d418de4caf17e82b629"
    TTS_URL = "http://127.0.0.1:9880"
    HOST = "192.168.8.167"
    PORT = 10096
    
    print("开始连接语音服务...")
    print(f"语音识别服务器: {HOST}:{PORT}")
    print(f"TTS服务器: {TTS_URL}")
    
    start_mic_client(
        deepseek_api_key=DEEPSEEK_API_KEY,
        tts_url=TTS_URL,
        host=HOST,
        port=PORT
    )

开始连接语音服务...
语音识别服务器: 192.168.8.167:10096
TTS服务器: http://127.0.0.1:9880
正在连接服务器 wss://192.168.8.167:10096...
连接成功！
实时识别: 怎么办眠也不
VAD结果: 一整个感觉最近这个精神状态不好和消化也不好，经常还会头疼失眠怎么办啊
AI回复: 建议您先调整作息，保持规律饮食和睡眠。可以尝试放松训练如深呼吸、冥想缓解压力。如果症状持续，建议来医院做详细检查，可能需要心理和生理双重干预。


正在生成语音...
播放中...
准备继续录音...
录音已恢复
[对话历史: 1轮]

实时识别: 百
VAD结果: ？510 
AI回复: 抱歉，我不太明白"510"的含义。如果您是想咨询心理健康相关问题，请详细描述您的症状和困扰，我会尽力为您解答。


正在生成语音...
播放中...
准备继续录音...
录音已恢复
[对话历史: 2轮]

实时识别: 不行
VAD结果: 不记得
AI回复: 没关系，可能是输入错误。如果您最近有情绪低落、焦虑或其他心理困扰，可以具体描述一下，我会根据您的情况给出专业建议。


正在生成语音...
播放中...
准备继续录音...
录音已恢复
[对话历史: 3轮]


VAD结果: 拜拜
AI回复: 再见，如果您有任何心理健康方面的疑问，随时可以来咨询。祝您生活愉快，保持身心健康。


正在生成语音...
播放中...
准备继续录音...
录音已恢复
[对话历史: 4轮]

实时识别: 你好
VAD结果: 健康
AI回复: 健康包括身体和心理两方面。建议您保持规律作息、均衡饮食、适度运动，同时关注情绪变化。如有不适，及时就医。祝您身心健康！


正在生成语音...


In [None]:
!pip install python-vlc