Skip to content

Commit

Permalink
Merge pull request #162 from customprogrammingsolutions/test-coverage…
Browse files Browse the repository at this point in the history
…-utterance

[MRG] Test coverage utterance
  • Loading branch information
oadams committed May 31, 2018
2 parents 9964e01 + b4c6956 commit 53f9575
Show file tree
Hide file tree
Showing 2 changed files with 199 additions and 2 deletions.
200 changes: 198 additions & 2 deletions persephone/tests/test_utterance.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path

from persephone import utterance
from persephone.utterance import Utterance

def test_too_short():
from persephone.utterance import Utterance
from persephone import utterance
utterance_too_short = Utterance(
org_media_path=Path(
'data/org/BKW-speaker-ids/Mark on rock with Timecode.mp4'),
Expand All @@ -25,3 +25,199 @@ def test_too_short():
utterances = [utterance_too_short, utterance_ok]

assert utterance.remove_too_short(utterances) == [utterance_ok]

def test_remove_duplicates_same_time():
from persephone.utterance import Utterance, remove_duplicates
utter_a1 = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=2,
text='test text', speaker='Unit tester'
)

utter_a2 = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=2,
text='test text', speaker='Unit tester'
)

utter_b = Utterance(
org_media_path=Path(
'testb.wav'),
org_transcription_path=Path(
'testb.txt'),
prefix='testb',
start_time=1,
end_time=2,
text='testb text', speaker='Unit tester'
)

all_utterances = [utter_a1, utter_a2, utter_b]
result = remove_duplicates(all_utterances)
assert result
assert len(result) == 2
assert utter_b in result
assert (utter_a1 in result or utter_a2 in result)

def test_utterance_durations():
from persephone.utterance import Utterance, duration
utter_a1 = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=2,
text='test text', speaker='Unit tester'
)

duration_a1 = duration(utter_a1)
assert duration_a1 == 2 - 1

utter_a2 = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=15,
text='test text', speaker='Unit tester'
)
duration_a2 = duration(utter_a2)
assert duration_a2 == 15 - 1

from persephone.utterance import total_duration
utterance_group = [utter_a1, utter_a2]
group_duration = total_duration(utterance_group)
assert group_duration == duration_a1 + duration_a2

def test_make_speaker_utters():
"""Test that we can make an associative mapping between speak names
and the utterances that they made"""

from persephone.utterance import Utterance, make_speaker_utters
utter_a = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=2,
text='a text', speaker='a'
)

utter_b = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=2,
text='b text', speaker='b'
)

utter_c = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=2,
text='the first thing c said', speaker='c'
)

utter_c1 = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=6,
end_time=10,
text='the second thing c said', speaker='c'
)

all_utterances = [utter_a, utter_b, utter_c, utter_c1]

speakers_to_utterances = make_speaker_utters(all_utterances)
assert 'a' in speakers_to_utterances
assert 'b' in speakers_to_utterances
assert 'c' in speakers_to_utterances
assert 'NotValidSpeaker' not in speakers_to_utterances
assert len(speakers_to_utterances['a']) == 1
assert len(speakers_to_utterances['b']) == 1
assert len(speakers_to_utterances['c']) == 2

def test_speaker_durations():
"""Test that we can extract how long speakers spoke for in the utterances"""

from persephone.utterance import Utterance, speaker_durations
utter_a = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=2,
text='a text', speaker='a'
)
utter_a_duration = 2-1

utter_b = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=1,
end_time=2,
text='b text', speaker='b'
)
utter_b_duration = 2-1

utter_c = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=0,
end_time=20,
text='the first thing c said', speaker='c'
)

utter_c_duration = 20-0

utter_c1 = Utterance(
org_media_path=Path(
'test.wav'),
org_transcription_path=Path(
'test.txt'),
prefix='test',
start_time=400,
end_time=900,
text='the second thing c said', speaker='c'
)
utter_c1_duration = 900-400

all_utterances = [utter_a, utter_b, utter_c, utter_c1]
durations_by_speaker = speaker_durations(all_utterances)
assert durations_by_speaker
assert ('a', utter_a_duration) in durations_by_speaker
assert ('b', utter_b_duration) in durations_by_speaker
assert ('c', utter_c_duration+utter_c1_duration) in durations_by_speaker
1 change: 1 addition & 0 deletions persephone/utterance.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def duration(utter: Utterance) -> int:
return utter.end_time - utter.start_time

def total_duration(utterances: List[Utterance]) -> int:

"""Get the duration of an entire list of utterances in milliseconds
Args:
utterances: The list of utterance we are finding the duration of
Expand Down

0 comments on commit 53f9575

Please sign in to comment.