In [2]:
# load visualization function (see `demo.py`)
from demo import visualize

# `community-1` release webinar

In [3]:
# sample audio file shipped with pyannote.audio 4.0
from pyannote.audio.sample import SAMPLE_FILE
path = SAMPLE_FILE['audio']
reference = SAMPLE_FILE['annotation']
visualize(path, {'reference': reference})

VBox(children=(HTML(value='<h2>reference</h2>'), Waveform(audio_as_base64='data:audio/x-wav;base64,UklGRjJMHQB…

## Getting start with `community-1`

### Initial setup

* Create access token on Huggingface at [hf.co/settings/tokens](https://hf.co/settings/tokens)
* Request access on Huggingface at [hf.co/pyannote/speaker-diarization-community-1](https://hf.co/pyannote/speaker-diarization-community-1)

In [4]:
# token is stored in HF_TOKEN environment variable
import os
hf_token = os.environ.get('HF_TOKEN', True)

### Download `community-1` from Huggingface

In [5]:
from pyannote.audio import Pipeline

community1 = Pipeline.from_pretrained(
    'pyannote/speaker-diarization-community-1',
    token=hf_token)

### Process (30s) sample file on CPU

In [6]:
%%time
output = community1(path)

CPU times: user 36.8 s, sys: 10.8 s, total: 47.6 s
Wall time: 13.3 s


### Process sample file with faster backends

In [7]:
# send community-1 pipeline to faster Pytorch backend 
import torch
_ = community1.to(torch.device("mps"))  # or "cuda" for nvidia GPUs

In [8]:
%%time
output = community1(path)

CPU times: user 464 ms, sys: 349 ms, total: 814 ms
Wall time: 1.12 s


### Iterate over speech turns

In [9]:
for turn, speaker in output.speaker_diarization:
    print(f"{turn.start:6.3f} {turn.end:6.3f} {speaker}")

 6.730  6.747 SPEAKER_00
 6.747  7.034 SPEAKER_01
 7.034  7.186 SPEAKER_00
 7.591  7.608 SPEAKER_00
 7.608  8.317 SPEAKER_01
 8.317  9.920 SPEAKER_00
 9.920 10.983 SPEAKER_01
10.460 14.746 SPEAKER_00
14.307 17.885 SPEAKER_01
18.020 21.513 SPEAKER_00
18.155 18.442 SPEAKER_01
21.766 28.499 SPEAKER_01
27.858 29.967 SPEAKER_00


## `community-1` improves speaker assignment and counting

In [10]:
from pyannote.database.util import load_rttm
legacy = load_rttm('sample-3.1.rttm')['sample']

In [11]:
visualize(path, {'reference': reference, 
                 'legacy (3.1)': legacy, 
                 'community-1': output.speaker_diarization})

VBox(children=(HTML(value='<h2>reference</h2>'), Waveform(audio_as_base64='data:audio/x-wav;base64,UklGRjJMHQB…

## `community-1` streamlines reconciliation with STT

In [14]:
visualize(path, {'output.speaker_diarization': output.speaker_diarization, 
                 'output.exclusive_speaker_diarization': output.exclusive_speaker_diarization})

VBox(children=(HTML(value='<h2>output.speaker_diarization</h2>'), Waveform(audio_as_base64='data:audio/x-wav;b…

## `community-1` is hosted at cost on pyannoteAI API

### Initial setup

* Create an account on [dashboard.pyannote.ai](https://dashboard.pyannote.ai)
* Create a pyannoteAI API key 
* Enjoy free credits

In [15]:
api_key = os.environ['PYANNOTEAI_API_KEY']
community1_cloud = Pipeline.from_pretrained(
    'pyannote/speaker-diarization-community-1-cloud', 
    token=api_key)

In [16]:
%%time
# runs on pyannoteAI cloud
output_cloud = community1_cloud(path)

CPU times: user 288 ms, sys: 29.7 ms, total: 318 ms
Wall time: 21.6 s


In [17]:
visualize(path, {'local': output.speaker_diarization, 
                 'cloud': output_cloud.speaker_diarization})

VBox(children=(HTML(value='<h2>local</h2>'), Waveform(audio_as_base64='data:audio/x-wav;base64,UklGRjJMHQBXQVZ…

## `precision-2` is supported too!

In [18]:
precision2 = Pipeline.from_pretrained(
    'pyannote/speaker-diarization-precision-2', 
    token=api_key)

In [19]:
%%time
output_precision2 = precision2(path)

CPU times: user 204 ms, sys: 21.2 ms, total: 225 ms
Wall time: 11.5 s


In [20]:
visualize(path, {'reference': reference, 
                 'community-1': output.speaker_diarization,
                 'precision-2': output_precision2.speaker_diarization})

VBox(children=(HTML(value='<h2>reference</h2>'), Waveform(audio_as_base64='data:audio/x-wav;base64,UklGRjJMHQB…

#### Enjoy state-of-the-art accuracy

In [30]:
from pyannote.metrics.diarization import DiarizationErrorRate
der = DiarizationErrorRate()

l = der(reference, legacy)
c1 = der(reference, output.speaker_diarization)
p2 = der(reference, output_precision2.speaker_diarization)

print(f"Diarization error rate // legacy (3.1): {l:.1%} // community-1: {c1:.1%} // precision-2: {p2:.1%}")

Diarization error rate // legacy (3.1): 12.7% // community-1: 5.2% // precision-2: 3.0%
