<a href="https://colab.research.google.com/github/kumar045/Assignment-For-Filed/blob/main/Real_time.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:

import pyaudio
import numpy as np
from pyannote.audio import Pipeline
from pyannote.core import Segment

# Initialize the speaker diarization pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")

# Setup audio stream parameters
FORMAT = pyaudio.paFloat32
CHANNELS = 1
RATE = 16000
CHUNK = 1024
audio_interface = pyaudio.PyAudio()

# Function to process audio chunks
def process_audio(data, frame_rate):
    # Convert the byte data to a numpy array
    audio = np.frombuffer(data, dtype=np.float32)
    # Diarize the current chunk
    diarization = pipeline({"waveform": audio, "sample_rate": frame_rate})
    return diarization

# Callback function to stream audio and perform diarization
def callback(in_data, frame_count, time_info, status):
    diarization = process_audio(in_data, RATE)
    print(diarization)
    return (in_data, pyaudio.paContinue)

# Open the stream and start processing
stream = audio_interface.open(format=FORMAT, channels=CHANNELS,
                              rate=RATE, input=True,
                              frames_per_buffer=CHUNK,
                              stream_callback=callback)

stream.start_stream()

# Keep the stream open and process audio until stopped
try:
    while stream.is_active():
        pass
except KeyboardInterrupt:
    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    audio_interface.terminate()


Could not download 'pyannote/speaker-diarization' pipeline.
It might be because the pipeline is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Pipeline.from_pretrained('pyannote/speaker-diarization',
   ...                          use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the pipeline is gated:
visit https://hf.co/pyannote/speaker-diarization to accept the user conditions.


OSError: [Errno -9996] Invalid input device (no default output device)

In [14]:
import numpy as np
import librosa
import sounddevice as sd
from sklearn.cluster import KMeans
import logging

# Initialize logging
logging.basicConfig(level=logging.INFO)

# Constants
SAMPLE_RATE = 16000
N_MFCC = 13
N_FFT = 512
HOP_LENGTH = int(SAMPLE_RATE * 0.010)  # 10 ms
ENERGY_THRESHOLD = 0.05  # VAD threshold

audio_buffer = np.array([], dtype=np.float32)
mfcc_features_list = []

def extract_mfcc_from_buffer(buffer, sr=SAMPLE_RATE):
    try:
        if len(buffer) >= N_FFT:
            mfccs = librosa.feature.mfcc(y=buffer, sr=sr, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH)
            return np.mean(mfccs.T, axis=0)
    except Exception as e:
        logging.error(f"Error extracting MFCC: {e}")
    return None

def identify_speakers(mfcc_features_list):
    try:
        if len(mfcc_features_list) >= 10:  # Ensure we have enough features for clustering
            X = np.vstack(mfcc_features_list)
            kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
            latest_mfcc = mfcc_features_list[-1].reshape(1, -1)
            speaker_label = kmeans.predict(latest_mfcc)[0]
            logging.info(f"Current speaker: Speaker {speaker_label + 1}")
        else:
            logging.info("Accumulating more features for clustering...")
    except Exception as e:
        logging.error(f"Error in identify_speakers: {e}")

def audio_callback(indata, frames, time, status):
    global audio_buffer, mfcc_features_list
    if status:
        logging.warning(status)
    try:
        # VAD: Check if audio has enough energy
        frame_energy = np.sum(indata[:, 0] ** 2)
        if frame_energy > ENERGY_THRESHOLD:
            audio_buffer = np.concatenate((audio_buffer, indata[:, 0]))
            if len(audio_buffer) >= N_FFT:
                mfcc_features = extract_mfcc_from_buffer(audio_buffer)
                if mfcc_features is not None:
                    mfcc_features_list.append(mfcc_features)
                    identify_speakers(mfcc_features_list)
                    audio_buffer = np.array([], dtype=np.float32)  # Clear the buffer
    except Exception as e:
        logging.error(f"Error in audio_callback: {e}")

def main():
    try:
        with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=audio_callback):
            logging.info("Recording... Press Ctrl+C to stop.")
            while True:
                sd.sleep(1000)
    except KeyboardInterrupt:
        logging.info("Recording stopped by user.")
    except Exception as e:
        logging.error(f"An error occurred in main: {e}")

if __name__ == "__main__":
    main()

ERROR:root:An error occurred in main: Error querying device -1


In [6]:
!sudo apt-get install python3-pyaudio

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libportaudio2
Suggested packages:
  python-pyaudio-doc
The following NEW packages will be installed:
  libportaudio2 python3-pyaudio
0 upgraded, 2 newly installed, 0 to remove and 33 not upgraded.
Need to get 91.2 kB of archives.
After this operation, 340 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libportaudio2 amd64 19.6.0-1.1 [65.3 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 python3-pyaudio amd64 0.2.11-1.3ubuntu1 [25.9 kB]
Fetched 91.2 kB in 1s (146 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 2.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline

In [12]:
!pip install pyannote.audio sounddevice

Collecting sounddevice
  Downloading sounddevice-0.4.6-py3-none-any.whl (31 kB)
Installing collected packages: sounddevice
Successfully installed sounddevice-0.4.6
