Skip to content

Commit

Permalink
Started new design
Browse files Browse the repository at this point in the history
  • Loading branch information
patspam committed May 17, 2010
1 parent 8f6f292 commit bc93eab
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 151 deletions.
5 changes: 2 additions & 3 deletions Changes
@@ -1,5 +1,4 @@
Revision history for App::BookIndex
Revision history for Book-Index

{{$NEXT}}
First version

First version with new design
5 changes: 0 additions & 5 deletions bin/indexer

This file was deleted.

2 changes: 1 addition & 1 deletion dist.ini
@@ -1,4 +1,4 @@
name = Indexer
name = Book-Index
author = Patrick Donelan <pat@patspam.com>
license = Perl_5
copyright_holder = Patrick Donelan
Expand Down
102 changes: 102 additions & 0 deletions lib/Book/Index.pm
@@ -0,0 +1,102 @@
package Book::Index;

# ABSTRACT: Create an index for a book manuscript

=head1 DESCRIPTION
=cut

use 5.010;
use strict;
use warnings;

use Book::Index::Tables;

# use File::Slurp qw(read_file);
# use Lingua::EN::Splitter;
# use Lingua::Stem::Snowball;
# use Lingua::EN::StopWords qw(%StopWords);
#
# use ORLite {
# file => 'sqlite.db',
# readonly => 0,
# create => sub {
# my $dbh = shift;
# $dbh->do(<<END_SQL);
# CREATE TABLE word (
# id INTEGER PRIMARY KEY,
# word TEXT, count INTEGER,
# page INTEGER
# )
# END_SQL
# }
# };
#
# sub process {
# my ( $class, $filename ) = @_;
#
# my $doc = read_file($filename);
# my $splitter = new Lingua::EN::Splitter;
# my $stemmer = Lingua::Stem::Snowball->new( lang => 'en' );
#
# my @pages = split "\f", $doc;
#
# say "Generating index for file: $filename";
#
# for my $page ( 0 .. $#pages ) {
# say "Processing page $page..";
#
# my @words =
# grep { !$StopWords{$_} }
# map { lc } @{ $splitter->words( $pages[$page] ) };
#
# $stemmer->stem_in_place( \@words );
#
# my %freq;
# map { $freq{$_}++ } grep { !$StopWords{$_} } @words;
#
# for my $word ( keys %freq ) {
# Indexer::Word->new(
# word => $word,
# count => $freq{$word},
# page => $page
# )->insert;
# }
# }
#
# say "Finished procesing all pages.";
# }
#
# sub word {
# my ( $class, $word ) = @_;
# my $rows = Indexer->selectall_arrayref(
# 'select page, count from word where word = ?',
# undef, $word );
# print "$word: ";
# if ( !@$rows ) {
# say "not found.";
# return;
# }
# my @hits;
# for my $row (@$rows) {
# my $word = $row->[1];
# my $times = $row->[0];
# push @hits, $word . ( $times > 1 ? " (x$times)" : '' );
# }
# say join ', ', @hits;
# }
#
# sub top {
# my ( $class, $n ) = @_;
# $n ||= 10;
# my $rows = Indexer->selectall_arrayref(
# 'select sum(count) as count, word from word group by word order by count desc limit ?',
# undef, $n
# );
# say "Top $n Words:\n";
# for my $row (@$rows) {
# printf( "%5d %s\n", @$row );
# }
# }

1;
45 changes: 45 additions & 0 deletions lib/Book/Index/Tables.pm
@@ -0,0 +1,45 @@
package Book::Index::Tables;

# ABSTRACT: Defines ORLite tables

sub USER_VERSION() {1}

sub create {
my $dbh = shift;

$dbh->do( 'PRAGMA user_version = ' . USER_VERSION );

my %schema = (
page => [qw(page contents)],
phrase => [qw(phrase original primary)],
stem => [qw(stem)],
word_page => [qw(word page n)],
phrase_page => [qw(phrase word)],
phrase_stem => [qw(phrase stem)],
phrase_word_page => [qw(phrase word page n)],
phrase_stem_page => [qw(phrase stem page n)],
);

for my $table ( sort keys %schema ) {
my $cols = join ",\n ", map { "`$_`" } @{ $schema{$table} };
my $sql = <<END_SQL;
CREATE TABLE `$table` (
id INTEGER PRIMARY KEY,
$cols
)
END_SQL
# warn $sql;
$dbh->do($sql);
}
}

use ORLite {
'package' => 'Book::Index',
file => 'data/sqlite.db',
user_version => USER_VERSION,
cleanup => 'VACUUM',
create => \&create,
prune => 1, # while developing
};

1;
108 changes: 0 additions & 108 deletions lib/Indexer.pm

This file was deleted.

34 changes: 0 additions & 34 deletions lib/Indexer/Cmd.pm

This file was deleted.

5 changes: 5 additions & 0 deletions t/tables.t
@@ -0,0 +1,5 @@
use strict;
use Test::More tests => 1;
use Book::Index;

like(Book::Index->dsn, qr/^dbi:SQLite:/);

0 comments on commit bc93eab

Please sign in to comment.