Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
155 additions
and
151 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
Revision history for App::BookIndex | ||
Revision history for Book-Index | ||
|
||
{{$NEXT}} | ||
First version | ||
|
||
First version with new design |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
package Book::Index; | ||
|
||
# ABSTRACT: Create an index for a book manuscript | ||
|
||
=head1 DESCRIPTION | ||
=cut | ||
|
||
use 5.010; | ||
use strict; | ||
use warnings; | ||
|
||
use Book::Index::Tables; | ||
|
||
# use File::Slurp qw(read_file); | ||
# use Lingua::EN::Splitter; | ||
# use Lingua::Stem::Snowball; | ||
# use Lingua::EN::StopWords qw(%StopWords); | ||
# | ||
# use ORLite { | ||
# file => 'sqlite.db', | ||
# readonly => 0, | ||
# create => sub { | ||
# my $dbh = shift; | ||
# $dbh->do(<<END_SQL); | ||
# CREATE TABLE word ( | ||
# id INTEGER PRIMARY KEY, | ||
# word TEXT, count INTEGER, | ||
# page INTEGER | ||
# ) | ||
# END_SQL | ||
# } | ||
# }; | ||
# | ||
# sub process { | ||
# my ( $class, $filename ) = @_; | ||
# | ||
# my $doc = read_file($filename); | ||
# my $splitter = new Lingua::EN::Splitter; | ||
# my $stemmer = Lingua::Stem::Snowball->new( lang => 'en' ); | ||
# | ||
# my @pages = split "\f", $doc; | ||
# | ||
# say "Generating index for file: $filename"; | ||
# | ||
# for my $page ( 0 .. $#pages ) { | ||
# say "Processing page $page.."; | ||
# | ||
# my @words = | ||
# grep { !$StopWords{$_} } | ||
# map { lc } @{ $splitter->words( $pages[$page] ) }; | ||
# | ||
# $stemmer->stem_in_place( \@words ); | ||
# | ||
# my %freq; | ||
# map { $freq{$_}++ } grep { !$StopWords{$_} } @words; | ||
# | ||
# for my $word ( keys %freq ) { | ||
# Indexer::Word->new( | ||
# word => $word, | ||
# count => $freq{$word}, | ||
# page => $page | ||
# )->insert; | ||
# } | ||
# } | ||
# | ||
# say "Finished procesing all pages."; | ||
# } | ||
# | ||
# sub word { | ||
# my ( $class, $word ) = @_; | ||
# my $rows = Indexer->selectall_arrayref( | ||
# 'select page, count from word where word = ?', | ||
# undef, $word ); | ||
# print "$word: "; | ||
# if ( !@$rows ) { | ||
# say "not found."; | ||
# return; | ||
# } | ||
# my @hits; | ||
# for my $row (@$rows) { | ||
# my $word = $row->[1]; | ||
# my $times = $row->[0]; | ||
# push @hits, $word . ( $times > 1 ? " (x$times)" : '' ); | ||
# } | ||
# say join ', ', @hits; | ||
# } | ||
# | ||
# sub top { | ||
# my ( $class, $n ) = @_; | ||
# $n ||= 10; | ||
# my $rows = Indexer->selectall_arrayref( | ||
# 'select sum(count) as count, word from word group by word order by count desc limit ?', | ||
# undef, $n | ||
# ); | ||
# say "Top $n Words:\n"; | ||
# for my $row (@$rows) { | ||
# printf( "%5d %s\n", @$row ); | ||
# } | ||
# } | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package Book::Index::Tables; | ||
|
||
# ABSTRACT: Defines ORLite tables | ||
|
||
sub USER_VERSION() {1} | ||
|
||
sub create { | ||
my $dbh = shift; | ||
|
||
$dbh->do( 'PRAGMA user_version = ' . USER_VERSION ); | ||
|
||
my %schema = ( | ||
page => [qw(page contents)], | ||
phrase => [qw(phrase original primary)], | ||
stem => [qw(stem)], | ||
word_page => [qw(word page n)], | ||
phrase_page => [qw(phrase word)], | ||
phrase_stem => [qw(phrase stem)], | ||
phrase_word_page => [qw(phrase word page n)], | ||
phrase_stem_page => [qw(phrase stem page n)], | ||
); | ||
|
||
for my $table ( sort keys %schema ) { | ||
my $cols = join ",\n ", map { "`$_`" } @{ $schema{$table} }; | ||
my $sql = <<END_SQL; | ||
CREATE TABLE `$table` ( | ||
id INTEGER PRIMARY KEY, | ||
$cols | ||
) | ||
END_SQL | ||
# warn $sql; | ||
$dbh->do($sql); | ||
} | ||
} | ||
|
||
use ORLite { | ||
'package' => 'Book::Index', | ||
file => 'data/sqlite.db', | ||
user_version => USER_VERSION, | ||
cleanup => 'VACUUM', | ||
create => \&create, | ||
prune => 1, # while developing | ||
}; | ||
|
||
1; |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
use strict; | ||
use Test::More tests => 1; | ||
use Book::Index; | ||
|
||
like(Book::Index->dsn, qr/^dbi:SQLite:/); |