Skip to content

Commit

Permalink
Use Lingua::EN::Sentence directly to get offsets
Browse files Browse the repository at this point in the history
This is because the tests for Lingua::EN::Sentence::Offsets fail due to
a new release of Lingua::EN::Sentence
<andrefs/Lingua-EN-Sentence-Offsets#3>.
  • Loading branch information
zmughal committed May 6, 2018
1 parent a1fb75f commit 6a5e4e4
Showing 1 changed file with 19 additions and 3 deletions.
22 changes: 19 additions & 3 deletions lib/Renard/Incunabula/Language/EN.pm
Expand Up @@ -18,9 +18,6 @@ of each sentence.
=cut
fun apply_sentence_offsets_to_blocks( (InstanceOf['String::Tagged']) $text ) {
# loading here so that utf8::all does not effect everything
require Lingua::EN::Sentence::Offsets;
Lingua::EN::Sentence::Offsets->import(qw/get_offsets add_acronyms/);
$text->iter_extents_nooverlap(
sub {
my ( $extent, %tags ) = @_;
Expand All @@ -40,6 +37,25 @@ fun apply_sentence_offsets_to_blocks( (InstanceOf['String::Tagged']) $text ) {
);
}

fun get_offsets( $text ) {
# loading here so that utf8::all does not effect everything
require Lingua::EN::Sentence;
Lingua::EN::Sentence->import(qw/get_sentences/);

my $sentences = get_sentences($text);

my $offsets = [];
my $str = $text->str;
for my $s (@$sentences) {
my $s_re = $s =~ s/\s+/\\s+/gr;
$str =~ m/$s_re/g;
push @$offsets, [ $-[0], $+[0] ];

}

$offsets;
}

fun preprocess_for_tts( $text ) {
$_ = $text;
$_ = unidecode($_); # FIXME this is a sledgehammer approach
Expand Down

0 comments on commit 6a5e4e4

Please sign in to comment.