In [None]:
import pyaudio
import wave
import threading
import numpy as np
import librosa
import speech_recognition as sr
import tkinter as tk
from tkinter import scrolledtext
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Define the audio stream parameters
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024

# Initialize PyAudio
audio = pyaudio.PyAudio()

# Global flag to control the recording process
is_recording = False

# Function to record audio
def record_audio():
    global is_recording
    frames = []
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                        rate=RATE, input=True,
                        frames_per_buffer=CHUNK)
    while is_recording:
        data = stream.read(CHUNK)
        frames.append(data)
    
    stream.stop_stream()
    stream.close()
    
    # Save the recorded audio to a file
    wf = wave.open('output.wav', 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(audio.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

# Function to preprocess audio and extract features
def preprocess_and_extract_features(filename):
    y, sr = librosa.load(filename, sr=RATE)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs = np.mean(mfccs.T, axis=0)
    return mfccs

# Function to transcribe audio in real-time
def transcribe_audio():
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()
    
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source)
        while is_recording:
            audio_data = recognizer.listen(source)
            try:
                text = recognizer.recognize_google(audio_data)
                transcript_box.insert(tk.END, text + '\n')
                transcript_box.see(tk.END)
            except sr.UnknownValueError:
                transcript_box.insert(tk.END, "Could not understand audio\n")
                transcript_box.see(tk.END)
            except sr.RequestError as e:
                transcript_box.insert(tk.END, f"Request error from Google Speech Recognition service; {e}\n")
                transcript_box.see(tk.END)

# Function to start recording
def start_recording():
    global is_recording
    is_recording = True
    record_thread = threading.Thread(target=record_audio)
    transcribe_thread = threading.Thread(target=transcribe_audio)
    record_thread.start()
    transcribe_thread.start()

# Function to stop recording
def stop_recording():
    global is_recording
    is_recording = False
    # Process the recorded audio and extract features
    features = preprocess_and_extract_features('output.wav')
    transcript_box.insert(tk.END, f"Extracted Features: {features}\n")

# Create the GUI
root = tk.Tk()
root.title("Speech-to-Text Transcription")

record_button = tk.Button(root, text="Record", command=start_recording)
record_button.pack(pady=10)

stop_button = tk.Button(root, text="Stop", command=stop_recording)
stop_button.pack(pady=10)

transcript_box = scrolledtext.ScrolledText(root, width=60, height=20, wrap=tk.WORD)
transcript_box.pack(pady=10)

root.mainloop()

# Terminate the PyAudio object
audio.terminate()
