# **Pyannote Speaker Diarization-3.1 NO UI**

## Detect the Speakers of an Audio

**Credits**:
- Delik: [huggingface](https://huggingface.co/Delik) [github](https://github.com/D3lik) (making the code)
- [Poopmaster/Poiqazwsx](https://huggingface.co/poiqazwsx) (porting it to colab no ui)
- [Nick088](https://linktr.ee/Nick088) (adjusting the colab)

Join our server to talk about open source ai!

 [![Discord](https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge)](https://discord.gg/osai)


In [None]:
# @title Install Requirements
from IPython.display import clear_output
!pip install pyannote-audio==3.1.1 wavio
clear_output()
print("Installed!")

In [None]:
#@title Upload Audio File

#@markdown NOTE: **This won't work for NON Google Chrome Users, if you are one of those, instead do:**

#@markdown 1. Click on the Folder icon (file explorer).

#@markdown 2. Click upload and upload the audio file.

from google.colab import files

# upload
uploaded = files.upload()

In [None]:
# @title Run Audio Diarization
import torch
import os
from pyannote.audio import Pipeline
from google.colab import files
#@markdown Name of your audio file.
audio_filename = "speaker-diarization-test.wav" # @param {type:"string"}
#@markdown Get your Hugging Face Read Token [here](https://huggingface.co/settings/tokens) and paste it below, be sure that you accept the conditions of https://huggingface.co/pyannote/segmentation-3.0 & https://huggingface.co/pyannote/speaker-diarization-3.1.
hf_read_token = "" # @param {type:"string"}

#@markdown **IF YOU LEAVE ALL THE PARAMETERS BELOW TO 0, IT WILL BE ON AUTO MODE, AUTOMATICALLY DETECTING THE SPEAKERS, ELSE USE THE ONES BELOW FOR MORE COSTUMIZATION & BETTER RESULTS**

#@markdown Number of Speakers, **use it only if you know the number of speakers in advance, else leave it to 0 and use the parameters below**
num_speakers = 0 # @param {type:"slider", min:0, max:10, step:1}

#@markdown **Use the following parameters only if you don't know the number of speakers, you can set lower and/or upper bounds on the number of speakers, if instead you know it, leave the following parameters to 0 and use the one above**

#@markdown Minimum Number of Speakers
min_speakers = 0 # @param {type:"slider", min:0, max:10, step:1}
#@markdown Maximum Number of Speakers
max_speakers  = 0 # @param {type:"slider", min:0, max:10, step:1}


try:
    pipeline = Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.1",
        use_auth_token=f"{hf_read_token}")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    pipeline.to(device)
except Exception as e:
    print(f"Error initializing pipeline: {e}")
    pipeline = None

def save_audio(audio_filename):
    if pipeline is None:
        return "Error: Pipeline not initialized"
    return audio_filename

def diarize_audio(temp_file, num_speakers, min_speakers, max_speakers):
    if pipeline is None:
        return "Error: Pipeline not initialized"
    try:
        params = {}
        if num_speakers > 0:
            params["num_speakers"] = num_speakers
        if min_speakers > 0:
            params["min_speakers"] = min_speakers
        if max_speakers > 0:
            params["max_speakers"] = max_speakers

        diarization = pipeline(temp_file, **params)
    except Exception as e:
        return f"Error processing audio: {e}"

    return str(diarization)

def timestamp_to_seconds(timestamp):
    try:
        h, m, s = map(float, timestamp.split(':'))
        return 3600 * h + 60 * m + s
    except ValueError as e:
        print(f"Error converting timestamp to seconds: '{timestamp}'. Error: {e}")
        return None

def generate_labels_from_diarization(diarization_output):
    successful_lines = 0
    labels_path = 'labels.txt'
    try:
        with open(labels_path, 'w') as outfile:
            lines = diarization_output.strip().split('\n')
            for line in lines:
                try:
                    parts = line.strip()[1:-1].split(' --> ')
                    start_time = parts[0].strip()
                    end_time = parts[1].split(']')[0].strip()
                    label = line.split()[-1].strip()
                    start_seconds = timestamp_to_seconds(start_time)
                    end_seconds = timestamp_to_seconds(end_time)
                    outfile.write(f"{start_seconds}\t{end_seconds}\t{label}\n")
                    successful_lines += 1
                except Exception as e:
                    print(f"Error processing line: '{line.strip()}'. Error: {e}")
        print(f"Processed {successful_lines} lines successfully.")
        return labels_path if successful_lines > 0 else None
    except Exception as e:
        print(f"Cannot write to file '{labels_path}'. Error: {e}")
        return None

def process_audio(audio, num_speakers, min_speakers, max_speakers):
    diarization_result = diarize_audio(save_audio(audio), num_speakers, min_speakers, max_speakers)
    if diarization_result.startswith("Error"):
        return diarization_result, None
    else:
        label_file = generate_labels_from_diarization(diarization_result)
        return diarization_result, label_file

diarization_result, label_file = process_audio(audio_filename, num_speakers, min_speakers, max_speakers)
print(diarization_result)

In [None]:
#@title Dowload DAW Labels Result

from google.colab import files

files.download('labels.txt')
print("DAW Lable downloaded!")