In [1]:
## Load model
import pickle
from dotenv import load_dotenv
import os
import sys

sys.path.append(os.path.abspath(os.path.join('..', 'vap_sound/src')))

from vap_model import VAPModel

load_dotenv()

# Function to load model from pickle file
def load_model_pickle(path=os.environ.get("MODEL_PATH")):
    with open(path, "rb") as f:
        model = pickle.load(f)
    model.eval()
    print("Model loaded successfully from pickle file!")
    return model

In [2]:
import pyaudio
import torchaudio.transforms as T
import numpy as np
import torch

# Real-time inference from microphone
def infer_from_mic(model, sample_rate=16000, chunk_size=1024):
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=sample_rate, input=True, frames_per_buffer=chunk_size)
    mfcc_transform = T.MFCC(sample_rate=sample_rate, n_mfcc=40, 
                            melkwargs={"n_fft": 400, "hop_length": 160, "n_mels": 40})
    print("Listening...")
    
    try:
        while True:
            audio_data = stream.read(chunk_size)
            waveform = torch.from_numpy(np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0).unsqueeze(0)
            mfcc = mfcc_transform(waveform).squeeze(0).T.unsqueeze(0)  # Add batch dimension
            with torch.no_grad():
                output = model(mfcc)
                prediction = output.squeeze(0).cpu().numpy()
                print("Prediction:", prediction)
    except KeyboardInterrupt:
        print("Stopping...")
        stream.stop_stream()
        stream.close()
        p.terminate()

# # Load trained model and run inference
# model = load_model_pickle()
# infer_from_mic(model)

In [3]:
import pyaudio
import torch
import torchaudio.transforms as T
import numpy as np

# Real-time inference from microphone, streaming to a function
def infer_from_mic_stream(model, callback_function, sample_rate=16000, chunk_size=1024):
# def infer_from_mic_stream(model, sample_rate=16000, chunk_size=1024):
    """
    Streams microphone audio to a callback function for real-time inference.

    Args:
        model: The PyTorch model for inference.
        callback_function: A function that will receive the model's prediction.
        sample_rate: The audio sample rate.
        chunk_size: The size of audio chunks to process.
    """
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=sample_rate, input=True, frames_per_buffer=chunk_size)
    mfcc_transform = T.MFCC(sample_rate=sample_rate, n_mfcc=40,
                            melkwargs={"n_fft": 400, "hop_length": 160, "n_mels": 40})

    try:
        while True:
            audio_data = stream.read(chunk_size)
            waveform = torch.from_numpy(np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0).unsqueeze(0)
            mfcc = mfcc_transform(waveform).squeeze(0).T.unsqueeze(0)  # Add batch dimension
            with torch.no_grad():
                output = model(mfcc)
                prediction = output.squeeze(0).cpu().numpy()
                array = np.array(prediction)
                mean = np.mean(array)
                avg = mean.item()
                # print(avg)
                # print(prediction)
                callback_function(avg)  # Call the provided function with the prediction

    except KeyboardInterrupt:
        stream.stop_stream()
        stream.close()
        p.terminate()


# infer_from_mic_stream(model)

In [None]:
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import numpy as np
import time
import threading

# Initialize lists to store the data points
max_window_seconds = 20  # Sliding window of 20 seconds
data = []
timestamps = []

# # Lock for thread-safe access to shared data
data_lock = threading.Lock()

# type(data_lock)

def process_pred(prediction):
    # Append the scalar value to the data list
    current_time = time.time()
    with data_lock:  # Ensure thread-safe access to shared data
        data.append(prediction)  # Use the first element of column_average
        timestamps.append(current_time)
        
        # Remove data points older than the sliding window
        while timestamps and (current_time - timestamps[0]) > max_window_seconds:
            timestamps.pop(0)
            data.pop(0)
    
    # Print for debugging
    print("Data: ", data)
    print("Timestamps: ", timestamps)



import time
import os

LOG_FILE = os.environ.get("MODEL_PREDICTION_FILE")

def write_to_file_pred(prediction):
    log_file = LOG_FILE
    
    # Create the log file if it doesn't exist
    if not os.path.exists(log_file):
        with open(log_file, "w") as f:
            f.write("PREDICTION LOG\n")  # Add a header

    # Append new prediction with timestamp
    # cur_time = time.strftime("%Y-%m-%d %H:%M:%S")  # Readable timestamp
    cur_time = time.time()
    with open(log_file, "a") as f:  # Append mode
        f.write(f"{cur_time} Prediction: {prediction}\n")


def remove_log_file():
    """Removes the log file after execution."""
    if os.path.exists(LOG_FILE):
        os.remove(LOG_FILE)
        print(f"Deleted log file: {LOG_FILE}")



model = load_model_pickle()
infer_from_mic_stream(model=model, callback_function=write_to_file_pred)

Model loaded successfully from pickle file!


In [9]:
remove_log_file()

Deleted log file: ../pred.txt
