Permalink
Browse files

experimental elasticsearch-based search DB support

  • Loading branch information...
1 parent 06f6ae6 commit e311008ad9175384ac7bfd5846dd9eabb4069711 @wakaba wakaba committed Feb 21, 2015
Showing with 195 additions and 17 deletions.
  1. +1 −1 bin/local-server
  2. +4 −0 config/L1.json
  3. +5 −1 config/local.json
  4. +13 −3 lib/SWE/DB.pm
  5. +73 −0 lib/SWE/DB/ES.pm
  6. +1 −0 lib/SWE/Object/Document.pm
  7. +1 −1 lib/SWE/Warabe/App.pm
  8. +28 −11 lib/suikawiki/main.pl
  9. +69 −0 sketch/search.pl
View
@@ -1,6 +1,6 @@
basename=`dirname $0`/..
SW_DB_DIR=$basename/local/testdata \
KARASUMA_CONFIG_JSON=$basename/config/local.json \
-KARASUMA_CONFIG_FILE_DIR_NAME=$basename/local/keys/L1 \
+KARASUMA_CONFIG_FILE_DIR_NAME=$basename/local/keys/local \
exec $basename/plackup $basename/bin/server.psgi \
-p 5012 -s Starlet
View
@@ -6,6 +6,10 @@
"edit_basic_auth": "editors.json",
+ "es_index_url": "es_index_url.txt",
+ "es_user": "es_user.txt",
+ "es_password": "es_password.txt",
+
"web_port": 7410,
"web_has_reverse_proxy": 1,
"env_WEBUA_DEBUG": 0
View
@@ -4,5 +4,9 @@
"wiki_page_help": "HelpPage",
"wiki_page_contact": "ContactPage",
- "edit_basic_auth": "editors.json"
+ "edit_basic_auth": "editors.json",
+
+ "es_index_url": "es_index_url.txt",
+ "es_user": "es_user.txt",
+ "es_password": "es_password.txt"
}
View
@@ -2,8 +2,8 @@ package SWE::DB;
use strict;
use warnings;
-sub new_from_root_path ($$) {
- my $self = bless {root_path => $_[1]}, $_[0];
+sub new_from_root_path_and_config ($$$) {
+ my $self = bless {root_path => $_[1], config => $_[2]}, $_[0];
($self->{ids_path} = $self->{root_path}->child ('ids'))->mkpath;
($self->{names_path} = $self->{root_path}->child ('names'))->mkpath;
return $self;
@@ -13,6 +13,10 @@ sub root_path ($) {
return $_[0]->{root_path};
} # root_path
+sub config ($) {
+ return $_[0]->{config};
+} # config
+
## DEPRECATED
sub db_dir_name ($) {
return $_[0]->{root_path} . '/';
@@ -190,11 +194,17 @@ sub vc ($) {
return SWE::DB::VersionControl->new_from_root_path ($self->root_path);
} # vc
+sub es ($) {
+ my $self = $_[0];
+ require SWE::DB::ES;
+ return SWE::DB::ES->new_from_config ($self->config);
+} # es
+
1;
=head1 LICENSE
-Copyright 2002-2014 Wakaba <wakaba@suikawiki.org>.
+Copyright 2002-2015 Wakaba <wakaba@suikawiki.org>.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
View
@@ -0,0 +1,73 @@
+package SWE::DB::ES;
+use strict;
+use warnings;
+use AnyEvent;
+use Web::UserAgent::Functions qw(http_post_data);
+use JSON::Functions::XS qw(perl2json_bytes json_bytes2perl);
+use Char::Transliterate::Kana;
+
+sub new_from_config ($$) {
+ return bless {config => $_[1]}, $_[0];
+} # new_from_config
+
+sub url_prefix ($) {
+ return $_[0]->{config}->get_file_text ('es_index_url');
+} # url_prefix
+
+sub basic_auth ($) {
+ return [$_[0]->{config}->get_file_base64_text ('es_user'),
+ $_[0]->{config}->get_file_base64_text ('es_password')];
+} # basic_auth
+
+sub update ($$$$) {
+ my ($self, $id, $title, $doc) = @_;
+ my $text = ($title // '') . "\n" . $doc->document_element->text_content;
+ katakana_to_hiragana $text;
+ undef $title unless defined $title and length $title;
+
+ my $url_prefix = $self->url_prefix;
+ my $cv = AE::cv;
+ http_post_data
+ override_method => 'PUT',
+ url => qq<$url_prefix/i/$id>,
+ basic_auth => $self->basic_auth,
+ content => perl2json_bytes {
+ title => $title,
+ content => $text,
+ },
+ anyevent => 1,
+ cb => sub { $cv->send };
+ $cv->recv;
+} # update
+
+sub search ($$) {
+ my ($self, $word) = @_;
+ katakana_to_hiragana $word;
+
+ my $url_prefix = $self->url_prefix;
+ my $cv = AE::cv;
+ http_post_data
+ url => qq<$url_prefix/i/_search?fields=title>,
+ basic_auth => $self->basic_auth,
+ content => perl2json_bytes {
+ query => {
+ match => {content => {
+ query => $word,
+ operator => 'and',
+ }},
+ },
+ },
+ anyevent => 1,
+ cb => sub {
+ my (undef, $res) = @_;
+ my $result = [];
+ if ($res->is_success) {
+ my $json = json_bytes2perl $res->content;
+ push @$result, map { {id => $_->{_id}, score => $_->{_score}, title => $_->{fields}->{title}->[0]} } @{$json->{hits}->{hits}};
+ }
+ $cv->send ($result);
+ };
+ return $cv->recv;
+} # search
+
+1;
@@ -142,6 +142,7 @@ sub name ($) {
## ------ Indexing ------
+# XXX obsolete
sub update_tfidf ($$) {
return; # XXX
@@ -23,7 +23,7 @@ sub db_root_path ($;$) {
sub db ($) {
require SWE::DB;
- return $_[0]->{db} ||= SWE::DB->new_from_root_path ($_[0]->db_root_path);
+ return $_[0]->{db} ||= SWE::DB->new_from_root_path_and_config ($_[0]->db_root_path, $_[0]->config);
} # db
sub path_segments ($) {
View
@@ -420,15 +420,28 @@ ($$)
}
}
+ my $searched = $db->es->search ($name);
+
$app->http->add_response_header
('Content-Type' => 'text/plain; charset=utf-8');
- for my $id (sort {$index->{$b} <=> $index->{$a}} keys %$index) {
+ my $id_found = {};
+ for my $item (@$searched) {
+ my $id_prop = $db->id_prop->get_data ($item->{id});
+ my $name = $item->{title} // [keys %{$id_prop->{name}}]->[0] // $item->{id};
+ $app->http->send_response_body_as_text
+ (join '', $item->{score}, "\t", $item->{id}, "\t", $name, "\x0A");
+ $id_found->{$item->{id}}++;
+ }
+
+ # XXX old
+ for my $id (sort {$index->{$b} <=> $index->{$a}} grep { not $id_found->{$_} } keys %$index) {
my $id_prop = $db->id_prop->get_data ($id);
my $name = [keys %{$id_prop->{name}}]->[0] // $id;
$app->http->send_response_body_as_text
(join '', $index->{$id}, "\t", $id, "\t", $name, "\x0A");
}
+
$app->http->close_response_body;
return $app->throw;
} elsif ($param eq 'posturl') {
@@ -511,10 +524,11 @@ ($$)
my $cache_prop = $db->id_cache_prop->get_data ($id);
my $doc = $id_prop ? get_xml_data ($db, $id, $id_prop, $cache_prop) : undef;
if (defined $doc) {
- require SWE::Object::Document;
- my $document = SWE::Object::Document->new (db => $db, id => $id);
- $document->{name_prop_db} = $db->name_prop;
- $document->update_tfidf ($doc);
+ #require SWE::Object::Document;
+ #my $document = SWE::Object::Document->new (db => $db, id => $id);
+ #$document->{name_prop_db} = $db->name_prop;
+ #$document->update_tfidf ($doc);
+ $db->es->update ($id, $id_prop->{title}, $doc);
}
return $app->throw_manual_redirect
@@ -573,7 +587,8 @@ ($$)
my $doc = $id_props ? get_xml_data ($db, $id, $id_props, $cache_prop) : undef;
if (defined $doc) {
- $document->update_tfidf ($doc);
+ #$document->update_tfidf ($doc);
+ $db->es->update ($id, $id_props->{title}, $doc);
}
$id_lock->unlock;
@@ -654,10 +669,11 @@ ($$)
}
if (defined $doc) {
- require SWE::Object::Document;
- my $document = SWE::Object::Document->new (db => $db, id => $id);
- $document->{name_prop_db} = $db->name_prop;
- $document->update_tfidf ($doc);
+ #require SWE::Object::Document;
+ #my $document = SWE::Object::Document->new (db => $db, id => $id);
+ #$document->{name_prop_db} = $db->name_prop;
+ #$document->update_tfidf ($doc);
+ $db->es->update ($id, $title, $doc);
}
return $app->throw;
@@ -1068,7 +1084,8 @@ ($$)
my $doc = $id_prop ? get_xml_data ($db, $id, $id_prop, $cache_prop) : undef;
if (defined $doc) {
- $document->update_tfidf ($doc);
+ #$document->update_tfidf ($doc);
+ $db->es->update ($id, $id_prop->{title}, $doc);
}
$id_lock->unlock;
View
@@ -0,0 +1,69 @@
+use strict;
+use warnings;
+use AnyEvent;
+use Web::UserAgent::Functions;
+use JSON::Functions::XS qw(perl2json_bytes json_bytes2perl);
+use Data::Dumper;
+
+my $url_prefix = shift;
+my $auth;
+if ($url_prefix =~ s{^https://([^\@:]+):([^\@:]+)\@}{https://}) {
+ $auth = [$1, $2];
+}
+
+
+{
+my $url = qq<$url_prefix/test/docs/3>;
+
+my $cv = AE::cv;
+
+use utf8;
+http_post
+ override_method => 'PUT',
+ anyevent => 1,
+ url => $url,
+ basic_auth => $auth,
+ content => perl2json_bytes {
+ content => "bc def",
+ },
+ cb => sub {
+ my (undef, $res) = @_;
+ if ($res->is_success < 400) {
+ $cv->send (json_bytes2perl $res->content);
+ } else {
+ die $res->code;
+ }
+ };
+
+warn $cv->recv;
+}
+
+
+{
+my $url = qq<$url_prefix/test/docs/_search>;
+
+my $cv = AE::cv;
+
+use utf8;
+http_post
+ anyevent => 1,
+ url => $url,
+ basic_auth => $auth,
+ content => perl2json_bytes {
+ query => {
+ match => {
+ content => {query => "えあう", operator => 'and'},
+ },
+ },
+ },
+ cb => sub {
+ my (undef, $res) = @_;
+ if ($res->is_success < 400) {
+ $cv->send (json_bytes2perl $res->content);
+ } else {
+ die $res->code;
+ }
+ };
+
+warn Dumper $cv->recv;
+}

0 comments on commit e311008

Please sign in to comment.