From e014dba0bc9c5c29bd7d6adb870c349b82b66bff Mon Sep 17 00:00:00 2001 From: Christopher Glasz Date: Wed, 22 Feb 2023 14:17:14 -0500 Subject: [PATCH] Replace LONG_SPEAKER_ID with SPEAKER_ID --- python/AzureSpeechDetection/README.md | 3 +-- .../acs_speech_component/acs_speech_component.py | 5 ----- python/AzureSpeechDetection/tests/test_acs_speech.py | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/python/AzureSpeechDetection/README.md b/python/AzureSpeechDetection/README.md index 95c196e6..a77b4d92 100644 --- a/python/AzureSpeechDetection/README.md +++ b/python/AzureSpeechDetection/README.md @@ -36,8 +36,7 @@ Returned `AudioTrack` objects have the following members in their `detection_pro | Property Key | Description | |--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `LONG_SPEAKER_ID` | A unique speaker identifier, of the form "`--<#>`, where `` and `` are integers indicating the segment range (in frame counts for video jobs, milliseconds for audio jobs) for sub-jobs when a job has been segmented by the Workflow Manager. The final `#` portion of the ID is a 1-indexed counter for speaker identity within the indicated segment range. When jobs are not segmented, or not submitted through the Workflow Manager at all, `stop_offset` may instead be `EOF`, indicating that the job extends to the end of the file. | -| `SPEAKER_ID` | A dummy field set to "0". | +| `SPEAKER_ID` | A unique speaker identifier, of the form "`--<#>`, where `` and `` are integers indicating the segment range (in frame counts for video jobs, milliseconds for audio jobs) for sub-jobs when a job has been segmented by the Workflow Manager. The final `#` portion of the ID is a 1-indexed counter for speaker identity within the indicated segment range. When jobs are not segmented, or not submitted through the Workflow Manager at all, `stop_offset` may instead be `EOF`, indicating that the job extends to the end of the file. | | `GENDER` | Only present if supplied by an upstream component. The gender of the speaker. | | `GENDER_CONFIDENCE` | Only present if supplied by an upstream component. The confidence of the gender classification. | | `TRANSCRIPT` | The text of the utterance transcript. Words are space-separated. | diff --git a/python/AzureSpeechDetection/acs_speech_component/acs_speech_component.py b/python/AzureSpeechDetection/acs_speech_component/acs_speech_component.py index d283943f..4f6a5d7e 100644 --- a/python/AzureSpeechDetection/acs_speech_component/acs_speech_component.py +++ b/python/AzureSpeechDetection/acs_speech_component/acs_speech_component.py @@ -70,11 +70,6 @@ def get_detections_from_job( logger.exception(f'Exception raised while processing audio: {e}') raise - # Remove this block to drop LONG_SPEAKER_ID - for track in audio_tracks: - track.detection_properties['LONG_SPEAKER_ID'] = track.detection_properties['SPEAKER_ID'] - track.detection_properties['SPEAKER_ID'] = '0' - logger.info('Processing complete. Found %d tracks.' % len(audio_tracks)) return audio_tracks diff --git a/python/AzureSpeechDetection/tests/test_acs_speech.py b/python/AzureSpeechDetection/tests/test_acs_speech.py index 8304f040..60136ce5 100644 --- a/python/AzureSpeechDetection/tests/test_acs_speech.py +++ b/python/AzureSpeechDetection/tests/test_acs_speech.py @@ -184,7 +184,7 @@ def test_diarization(self): # There should be two speakers with diarization, one without len_raw, len_dia = [ len(set([ - track.detection_properties['LONG_SPEAKER_ID'] + track.detection_properties['SPEAKER_ID'] for track in result ])) for result in results