In [None]:
!pip install pyannote.audio

In [None]:
!pip install pydub

In [None]:
import torch
import time
from pyannote.audio import Pipeline
from pyannote.core import Annotation
from typing import Optional

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

print(f"TF32 matmul allowed: {torch.backends.cuda.matmul.allow_tf32}")
print(f"TF32 cuDNN allowed: {torch.backends.cudnn.allow_tf32}")

In [None]:
def perform_diarization(wav_path, num_speakers=2, use_gpu=False, api_token=None):
    # Choose the device
    device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    # Initialize the pipeline and set device
    if api_token:
        pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization@2.1", use_auth_token=api_token)
    else:
        pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization@2.1")

    # Ensure the model is loaded on the correct device
    pipeline.to(torch.device(device))

    # Run diarization
    print("Performing diarization...")
    start_time = time.time()
    diarization = pipeline({"audio": wav_path, "num_speakers": num_speakers})

    # Print the results
    for turn, _, speaker in diarization.itertracks(yield_label=True):
        print(f"Speaker {speaker} speaks from {turn.start:.1f}s to {turn.end:.1f}s")
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Total execution time: {elapsed_time:.2f} seconds")
    return diarization

In [None]:
def download_diarization_model(api_token: Optional[str] = None) -> Pipeline:
    """
    Downloads the pyannote.audio speaker diarization model and caches it locally.

    Parameters
    ----------
    api_token : Optional[str], default=None
        Hugging Face API token if the model is private. If the model is public, this can be omitted.

    Returns
    -------
    Pipeline
        An instance of the `pyannote.audio.Pipeline` for speaker diarization.

    Raises
    ------
    ImportError
        If `pyannote.audio` is not installed.
    ValueError
        If the provided API token is invalid or insufficient for accessing the model.
    Exception
        For any other exceptions that may occur during the model download.

    Examples
    --------
    >>> # Download the diarization model without an API token (for public models)
    >>> pipeline = download_diarization_model()

    >>> # Download the diarization model with an API token (for private models)
    >>> pipeline = download_diarization_model(api_token="your_hugging_face_api_token")

    >>> # Use the downloaded pipeline for diarization
    >>> diarization = pipeline("path/to/audio/file.wav")
    """

    try:
        # Initialize the pipeline to trigger model download
        if api_token:
            pipeline = Pipeline.from_pretrained(
                "pyannote/speaker-diarization@2.1",
                use_auth_token=api_token
            )
        else:
            pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization@2.1")

        print("Model downloaded and cached locally.")
        return pipeline

    except ImportError as ie:
        print("pyannote.audio is not installed. Please install it using 'pip install pyannote.audio'.")
        raise ie

    except ValueError as ve:
        print("Invalid API token provided or insufficient permissions to access the model.")
        raise ve

    except Exception as e:
        print(f"An error occurred while downloading the diarization model: {e}")
        raise e

In [None]:
def perform_diarization_offline(
    wav_path: str,
    num_speakers: int = 2,
    use_gpu: bool = False,
    model_path: Optional[str] = None
) -> Annotation:
    """
    Perform speaker diarization using a locally cached pyannote.audio model.

    Parameters
    ----------
    wav_path : str
        Path to the input audio file.
    num_speakers : int, default=2
        Number of speakers to diarize.
    use_gpu : bool, default=False
        Whether to use GPU for processing. If `True`, the function will attempt to use CUDA.
    model_path : Optional[str], default=None
        Path to the local model directory. Must be provided for offline usage.

    Returns
    -------
    Annotation
        The diarization result containing speaker segments.

    Raises
    ------
    ValueError
        If `model_path` is not provided for offline usage.
    ImportError
        If required libraries (`pyannote.audio`, `torch`) are not installed.
    RuntimeError
        If GPU is requested but not available.

    Examples
    --------
    >>> wav_file_path = "path/to/your/audio.wav"
    >>> local_model_path = "/path/to/local_model/"  # Replace with your local model path
    >>> diarization = perform_diarization_offline(
    ...     wav_path=wav_file_path,
    ...     num_speakers=2,
    ...     use_gpu=True,
    ...     model_path=local_model_path
    ... )
    >>> for turn, _, speaker in diarization.itertracks(yield_label=True):
    ...     print(f"Speaker {speaker} speaks from {turn.start:.1f}s to {turn.end:.1f}s")
    """

    try:
        # Validate model_path
        if not model_path:
            raise ValueError("`model_path` must be provided for offline usage.")

        # Choose the device
        if use_gpu:
            if torch.cuda.is_available():
                device = "cuda"
            else:
                raise RuntimeError("GPU requested but CUDA is not available.")
        else:
            device = "cpu"
        print(f"Using device: {device}")

        # Initialize the pipeline from the local model
        pipeline = Pipeline.from_pretrained(model_path)
        print("Pipeline loaded from local model.")

        # Ensure the model is loaded on the correct device
        pipeline.to(torch.device(device))

        # Run diarization
        print("Performing diarization...")
        diarization = pipeline({
            "audio": wav_path,
            "num_speakers": num_speakers
        })

        # Print the results
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            print(f"Speaker {speaker} speaks from {turn.start:.1f}s to {turn.end:.1f}s")

        return diarization

    except ImportError as ie:
        print("Required libraries are not installed. Please install them using:")
        print("pip install torch pyannote.audio")
        raise ie

    except ValueError as ve:
        print(f"ValueError: {ve}")
        raise ve

    except RuntimeError as re:
        print(f"RuntimeError: {re}")
        raise re

    except Exception as e:
        print(f"An unexpected error occurred during diarization: {e}")
        raise e

In [None]:
diarization_result = perform_diarization(
    wav_path="sales_call_example_1.wav",
    use_gpu=True,
    api_token="<your-hf-token-goes-here>")