In [1]:
# 功能描述
# 監聽麥克風，並產生文字

In [1]:
import tkinter as tk
import speech_recognition as sr
import threading
from tkinter import filedialog
from pydub import AudioSegment # 用以保存wav檔案
import io

# 初始化錄音器
recognizer = sr.Recognizer()
recording = False
audio_segments = []

def start_recording():
    global recording, audio_segments
    recording = True
    audio_segments = []  # 清空之前的音频片段
    status_label.config(text="狀態: 錄音中...")
    threading.Thread(target=record_and_transcribe).start()

def stop_recording():
    global recording
    recording = False
    status_label.config(text="狀態: 停止錄音")
    save_audio_to_wav()  # 停止錄音時保存音频文件

def record_and_transcribe():
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source)
        print("Recording started...")
        while recording:
            try:
                # 錄製音訊
                audio = recognizer.listen(source, timeout=5)
                
                # 將音訊轉換為文字
                text = recognizer.recognize_google(audio, language="zh-TW")
                print("Recognized Text:", text)
                
                # 將文字添加到新視窗
                transcript_text.insert(tk.END, text + "\n")
                transcript_text.see(tk.END)  # 移動到最新的內容
                
                # 保存音频片段到列表
                audio_data = io.BytesIO(audio.get_wav_data())
                audio_segment = AudioSegment.from_wav(audio_data)
                audio_segments.append(audio_segment)

            except sr.UnknownValueError:
                print("未能偵測到音訊...")
            except sr.RequestError:
                print("Google語音服務不可用...")

# 清空文字視窗
def clear_text():
    transcript_text.delete("1.0", tk.END)

# 將文字視窗保存到文件
def save_text():
    text_content = transcript_text.get("1.0", tk.END).strip()
    if text_content:
        file_path = filedialog.asksaveasfilename(defaultextension=".txt",
                                                 filetypes=[("Text files", "*.txt")])
        if file_path:
            with open(file_path, "w", encoding="utf-8") as file:
                file.write(text_content)
            print("内容已保存到", file_path)

# 保存音訊為wav 
def save_audio_to_wav():
    if audio_segments:
        # 合併所有錄音
        combined_audio = sum(audio_segments)
        file_path = filedialog.asksaveasfilename(defaultextension=".wav",
                                                 filetypes=[("WAV files", "*.wav")])
        if file_path:
            combined_audio.export(file_path, format="wav")
            print("wav檔案已保存到", file_path)

# 設置主要UI
root = tk.Tk()
root.title("麥克風語音轉換文字")
root.geometry("350x50")

# 錄製和停止錄製按鈕
start_button = tk.Button(root, text="錄製", command=start_recording)
start_button.place(x=25, y=10)

stop_button = tk.Button(root, text="停止錄製並保存wav", command=stop_recording)
stop_button.place(x=75, y=10)

# 狀態欄顯示
status_label = tk.Label(root, text="狀態: 請按下按鈕以開始", fg="blue")
status_label.place(x=200, y=10)

# 新增一個視窗以顯示轉錄文字
transcript_window = tk.Toplevel(root)
transcript_window.title("轉錄內容")

# 多行文字框，用以顯示轉錄內容
transcript_text = tk.Text(transcript_window, wrap="word", height=20, width=50)
transcript_text.pack(padx=10, pady=10)

# 清空按鈕
clear_button = tk.Button(transcript_window, text="清空内容", command=clear_text)
clear_button.pack(side="left", padx=10, pady=5)

# 保存按钮
save_button = tk.Button(transcript_window, text="保存文字檔", command=save_text)
save_button.pack(side="right", padx=10, pady=5)

root.mainloop()

Recording started...
result2:
{   'alternative': [{'confidence': 0.88902932, 'transcript': '2345'}],
    'final': True}
Recognized Text: 2345


Exception in thread Thread-5:
Traceback (most recent call last):
  File "C:\Users\USER\anaconda3\envs\AI_Friend\lib\threading.py", line 926, in _bootstrap_inner
    self.run()
  File "C:\Users\USER\anaconda3\envs\AI_Friend\lib\threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_27356\54898326.py", line 33, in record_and_transcribe
    audio = recognizer.listen(source, timeout=5)
  File "C:\Users\USER\anaconda3\envs\AI_Friend\lib\site-packages\speech_recognition\__init__.py", line 675, in listen
    raise WaitTimeoutError("listening timed out while waiting for phrase to start")
speech_recognition.WaitTimeoutError: listening timed out while waiting for phrase to start

