In [1]:
import sounddevice as sd
import soundfile as sf
import numpy as np
import tensorflow as tf
import pickle
from tensorflow.keras.models import load_model
import speech_recognition as sr
from datetime import datetime
import os
import threading
import queue

# Function to record audio until user inputs 's' to stop
def interactive_recording(samplerate=16000):
    print("Type 'r' to start recording and 's' to stop.")
    recording_queue = queue.Queue()
    is_recording = False
    stop_recording = threading.Event()

    def audio_callback(indata, frames, time, status):
        if status:
            print(f"Error in audio stream: {status}")
            return
        recording_queue.put(indata.copy())

    def record_audio():
        try:
            stream = sd.InputStream(samplerate=samplerate, channels=1, dtype='int16', callback=audio_callback)
            with stream:
                while not stop_recording.is_set():
                    pass  # Keep the stream active
        except Exception as e:
            print(f"Error during recording: {e}")

    try:
        while True:
            user_input = input("Enter 'r' to record or 's' to stop: ").strip().lower()
            if user_input == 'r' and not is_recording:
                print("Recording started...")
                is_recording = True
                stop_recording.clear()  # Ensure the stop event is cleared
                recording_thread = threading.Thread(target=record_audio)
                recording_thread.daemon = True  # Allow the main thread to exit
                recording_thread.start()
                recording = []  # Initialize/reset the recording list
                print("Recording in progress. Type 's' to stop.")

            elif user_input == 's' and is_recording:
                print("Stopping recording...")
                stop_recording.set()  # Signal the recording thread to stop
                is_recording = False
                # Gather all recorded audio from the queue
                while not recording_queue.empty():
                    recording.append(recording_queue.get())
                if recording:
                    combined_audio = np.concatenate(recording, axis=0)
                    return combined_audio
                else:
                    return np.array([], dtype='int16') # Return empty array

            elif user_input == 's' and not is_recording:
                print("No recording to stop. Type 'r' to start recording first.")

            else:
                print("Invalid input. Use 'r' to record and 's' to stop.")

    except KeyboardInterrupt:
        if is_recording:
            print("Keyboard interrupt detected. Stopping recording...")
            stop_recording.set()
            while not recording_queue.empty():
                recording.append(recording_queue.get())
            if recording:
                combined_audio = np.concatenate(recording, axis=0)
                return combined_audio
            else:
                return np.array([], dtype='int16')
        else:
            print("No audio recorded. Exiting...")
            return None
    finally:
        if is_recording:
            stop_recording.set() # Ensure the recording thread stops

# Function to save audio to a .wav file
def save_audio(audio_data, filename, samplerate=16000):
    if audio_data.size > 0:  # Check if there is audio data to save
        sf.write(filename, audio_data, samplerate, subtype='PCM_16')
        print(f"Audio saved to {filename}")
    else:
        print("No audio data to save.")

# Function to transcribe audio using a pre-trained model
def transcribe_audio(filename):
    if not os.path.exists(filename):
        print(f"Error: File not found at {filename}")
        return None

    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(filename) as source:
            audio = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio)
            print("Transcription:", text)
            return text
        except sr.UnknownValueError:
            print("Speech Recognition could not understand the audio.")
            return None
        except sr.RequestError as e:
            print(f"Could not request results from Google Speech Recognition service; {e}")
            return None
    except Exception as e:
        print(f"Error processing audio file: {e}")
        return None


# Function to save the model
def save_model(model, filename):
    if filename.endswith('.h5'):
        model.save(filename)
        print(f"Model saved to {filename}")
    elif filename.endswith('.pkl'):
        with open(filename, 'wb') as file:
            pickle.dump(model, file)
        print(f"Model saved to {filename}")
    else:
        print("Unsupported file format. Use .h5 or .pkl.")

# Example usage
if __name__ == "__main__":
    # Record audio interactively
    audio_data = interactive_recording()

    if audio_data is not None and audio_data.size > 0:
        # Save the audio to a file
        audio_filename = "recorded_audio.wav"
        save_audio(audio_data, audio_filename)

        # Transcribe the audio
        transcription = transcribe_audio(audio_filename)

        # Example: Using a simple pre-trained model (for demonstration purposes)
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(10, activation='relu', input_shape=(10,)),
            tf.keras.layers.Dense(1, activation='sigmoid')
        ])

        # Save the model
        model_filename = "speech_model.h5"
        save_model(model, model_filename)
    elif audio_data is not None:
        print("No audio data was recorded.")
    else:
        print("No audio data to process.")


Type 'r' to start recording and 's' to stop.
Recording started...
Recording in progress. Type 's' to stop.
Stopping recording...
Audio saved to recorded_audio.wav


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Transcription: hi I am Gajala from Washington DC CEO Sonic solutions
Model saved to speech_model.h5
