Permalink
Browse files

fixed Makefile.PL

fixed POD

cleaned some redundant code
  • Loading branch information...
1 parent 9cfc0a0 commit 4ef214ab76f2fd19eebdd0756d676403c72e13d4 @pawa- committed Nov 5, 2011
Showing with 91 additions and 76 deletions.
  1. +38 −8 Makefile.PL
  2. +8 −1 README
  3. +9 −2 lib/Text/KyTea.pm
  4. +27 −52 xs/KyTea.xs
  5. +9 −13 xs/typemap
View
@@ -1,19 +1,25 @@
use inc::Module::Install;
use Module::Install::XSUtil;
+my $LIB_PATH = prompt("Path to lib directory of KyTea", '/usr/local/lib');
+my $INC_PATH = prompt("Path to include directory of KyTea", '/usr/local/include');
+my %config = (LIBS => "-L$LIB_PATH -lkytea", INC => "-I$INC_PATH");
+
name 'Text-KyTea';
all_from 'lib/Text/KyTea.pm';
-requires 'Carp';
+requires 'Carp';
build_requires 'Devel::CheckLib';
test_requires 'Test::More' => 0.88; # done_testing
test_requires 'Test::Base';
requires_cplusplus;
cc_src_paths 'xs';
cc_warnings;
-cc_assert_lib lib => 'kytea';
-cc_libs 'kytea';
+#cc_assert_lib %config;
+simple_check_lib($LIB_PATH, $INC_PATH);
+cc_libs $config{LIBS};
+cc_include_paths $INC_PATH;
use_ppport 3.20;
tests 't/*.t';
@@ -27,8 +33,32 @@ resources(
bugtracker => 'https://github.com/pawa-/Text-KyTea/issues',
);
-WriteMakefile(
- CC => 'g++',
- LD => 'g++',
- XSOPT => '-C++',
-);
+WriteAll;
+
+
+sub simple_check_lib
+{
+ # In the case of using cc_assert_lib,
+ # if you change install directory of KyTea,
+ # you have to set LD_LIBRARY_PATH of KyTea.
+ # Otherwise cc_assert_lib causes an error.
+ # But you can use Text::KyTea without setting
+ # LD_LIBRARY_PATH indeed.
+ # So, I wrote this subroutine instead.
+
+ my ($lib_path, $inc_path) = @_;
+
+ die "wrong lib directory of KyTea!" if !grep { $_ =~ /libkytea\.so/ } contents_of($lib_path);
+ die "wrong include directory of KyTea!" if !grep { $_ =~ /kytea/ } contents_of($inc_path);
+}
+
+sub contents_of
+{
+ my $dir = shift;
+
+ opendir(my $dh, $dir) or die "can't open $dir: $!";
+ my @files = readdir $dh;
+ closedir($dh);
+
+ return @files;
+}
View
9 README
@@ -25,10 +25,14 @@ SYNOPSIS
}
DESCRIPTION
+ KyTea is a general toolkit developed for analyzing text, with a focus on
+ Japanese, Chinese and other languages requiring word or morpheme
+ segmentation.
+
This module works under KyTea Ver.0.3.2 or later. Under old versions of
KyTea, this might not works.
- For information about KyTea, please see the SEE ALSO.
+ For more information about KyTea, please see the SEE ALSO.
METHODS
new(%config)
@@ -48,6 +52,9 @@ METHODS
Reads the given model file. The model file should be read by
new(model => $path) method.
+ Model files are available at
+ http://www.phontron.com/kytea/model.html
+
parse($text)
Parses the given text via KyTea, and returns results of analysis.
The results are returned as an array reference.
View
@@ -4,7 +4,7 @@ use strict;
use warnings;
use Carp;
-our $VERSION = '0.21';
+our $VERSION = '0.22';
require XSLoader;
@@ -94,10 +94,15 @@ my ($text, %config, $path);
=head1 DESCRIPTION
+KyTea is a general toolkit developed for analyzing text,
+with a focus on Japanese, Chinese and other languages
+requiring word or morpheme segmentation.
+
This module works under KyTea Ver.0.3.2 or later.
Under old versions of KyTea, this might not works.
-For information about KyTea, please see the SEE ALSO.
+For more information about KyTea, please see the SEE ALSO.
+
=head1 METHODS
@@ -123,6 +128,8 @@ Creates a new Text::KyTea instance.
Reads the given model file.
The model file should be read by new(model => $path) method.
+Model files are available at http://www.phontron.com/kytea/model.html
+
=item parse($text)
View
@@ -24,6 +24,7 @@ extern "C" {
using namespace std;
using namespace kytea;
+const string tkt_keys[4] = {"surface", "feature", "score", "tags"};
class TextKyTea
{
@@ -40,10 +41,10 @@ public:
StringUtil* _get_string_util() { return util; }
void _init(
- const char* model, bool nows, bool notags, vector<int> notag,
- bool nounk, unsigned int unkbeam, unsigned int tagmax, const char* deftag,
- const char* unktag, const char* wordbound, const char* tagbound, const char* elembound,
- const char* unkbound, const char* skipbound, const char* nobound, const char* hasbound
+ const char* model, const bool nows, const bool notags, const vector<int> notag,
+ const bool nounk, const unsigned int unkbeam, const unsigned int tagmax, const char* deftag,
+ const char* unktag, const char* wordbound, const char* tagbound, const char* elembound,
+ const char* unkbound, const char* skipbound, const char* nobound, const char* hasbound
)
{
read_model(model);
@@ -89,63 +90,37 @@ PROTOTYPES: DISABLE
TextKyTea *
_init_text_kytea(char* CLASS, SV* args_ref)
PREINIT:
- HV* hv = (HV*)sv_2mortal( (SV*)newHV() );
-
- SV* model = sv_newmortal();
- SV* nows = sv_newmortal();
- SV* notags = sv_newmortal();
- SV* notag = sv_newmortal();
- SV* nounk = sv_newmortal();
- SV* unkbeam = sv_newmortal();
-
- SV* tagmax = sv_newmortal();
- SV* deftag = sv_newmortal();
- SV* unktag = sv_newmortal();
-
- SV* wordbound = sv_newmortal();
- SV* tagbound = sv_newmortal();
- SV* elembound = sv_newmortal();
- SV* unkbound = sv_newmortal();
- SV* skipbound = sv_newmortal();
- SV* nobound = sv_newmortal();
- SV* hasbound = sv_newmortal();
-
- AV* notag_av = (AV*)sv_2mortal( (SV*)newAV() );
+ HV* hv = (HV*)sv_2mortal( (SV*)newHV() );
+ AV* notag = (AV*)sv_2mortal( (SV*)newAV() );
vector<int> notag_vec;
CODE:
hv = (HV*)SvRV(args_ref);
- model = *hv_fetchs(hv, "model", FALSE);
- nows = *hv_fetchs(hv, "nows", FALSE);
- notags = *hv_fetchs(hv, "notags", FALSE);
- notag = *hv_fetchs(hv, "notag", FALSE);
- nounk = *hv_fetchs(hv, "nounk", FALSE);
- unkbeam = *hv_fetchs(hv, "unkbeam", FALSE);
+ notag = (AV*)SvRV( *hv_fetchs(hv, "notag", FALSE) );
- tagmax = *hv_fetchs(hv, "tagmax", FALSE);
- deftag = *hv_fetchs(hv, "deftag", FALSE);
- unktag = *hv_fetchs(hv, "unktag", FALSE);
-
- wordbound = *hv_fetchs(hv, "wordbound", FALSE);
- tagbound = *hv_fetchs(hv, "tagbound", FALSE);
- elembound = *hv_fetchs(hv, "elembound", FALSE);
- unkbound = *hv_fetchs(hv, "unkbound", FALSE);
- skipbound = *hv_fetchs(hv, "skipbound", FALSE);
- nobound = *hv_fetchs(hv, "nobound", FALSE);
- hasbound = *hv_fetchs(hv, "hasbound", FALSE);
-
- notag_av = (AV*)SvRV(notag);
-
- for (int i = 0; av_len(notag_av) >= i; ++i)
- notag_vec.push_back( SvIV( *av_fetch(notag_av, i, FALSE) ) );
+ for (int i = 0; av_len(notag) >= i; ++i)
+ notag_vec.push_back( SvIV( *av_fetch(notag, i, FALSE) ) );
TextKyTea* tkt = new TextKyTea();
+
tkt->_init(
- SvPV_nolen(model), SvUV(nows), SvUV(notags), notag_vec,
- SvUV(nounk), SvUV(unkbeam), SvUV(tagmax), SvPV_nolen(deftag),
- SvPV_nolen(unktag), SvPV_nolen(wordbound), SvPV_nolen(tagbound), SvPV_nolen(elembound),
- SvPV_nolen(unkbound), SvPV_nolen(skipbound), SvPV_nolen(nobound), SvPV_nolen(hasbound)
+ SvPV_nolen( *hv_fetchs(hv, "model", FALSE) ),
+ SvUV( *hv_fetchs(hv, "nows", FALSE) ),
+ SvUV( *hv_fetchs(hv, "notags", FALSE) ),
+ notag_vec,
+ SvUV( *hv_fetchs(hv, "nounk", FALSE) ),
+ SvUV( *hv_fetchs(hv, "unkbeam", FALSE) ),
+ SvUV( *hv_fetchs(hv, "tagmax", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "deftag", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "unktag", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "wordbound", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "tagbound", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "elembound", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "unkbound", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "skipbound", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "nobound", FALSE) ),
+ SvPV_nolen( *hv_fetchs(hv, "hasbound", FALSE) )
);
RETVAL = tkt;
View
@@ -15,47 +15,43 @@ KyteaSentence * T_KYTEA_SENTENCE
OUTPUT
T_KYTEA_SENTENCE
- AV *av = (AV*)sv_2mortal( (SV*)newAV() );
+ AV* av = (AV*)sv_2mortal( (SV*)newAV() );
StringUtil* util = THIS->_get_string_util();
- const string keys[4] = {"surface", "feature", "score", "tags"};
-
for (int i = 0; i < (int)$var->words.size(); ++i)
{
- HV *hv = (HV*)sv_2mortal( (SV*)newHV() );
+ HV* hv = (HV*)sv_2mortal( (SV*)newHV() );
string surface = util->showString($var->words[i].surf);
SV* sv = newSVpvn( surface.c_str(), surface.length() );
- hv_store(hv, keys[0].c_str(), keys[0].length(), sv, 0);
+ hv_store(hv, tkt_keys[0].c_str(), tkt_keys[0].length(), sv, 0);
- AV *tags = (AV*)sv_2mortal( (SV*)newAV() );
+ AV* tags = (AV*)sv_2mortal( (SV*)newAV() );
for (int j = 0; j < (int)$var->words[i].tags.size(); ++j)
{
- AV *tag_pairs = (AV*)sv_2mortal( (SV*)newAV() );
+ AV* tag_pairs = (AV*)sv_2mortal( (SV*)newAV() );
for (int k = 0; k < (int)$var->words[i].tags[j].size(); ++k)
{
- HV *tag = (HV*)sv_2mortal( (SV*)newHV() );
+ HV* tag = (HV*)sv_2mortal( (SV*)newHV() );
string feature = util->showString($var->words[i].tags[j][k].first);
double score = $var->words[i].tags[j][k].second;
sv = newSVpvn( feature.c_str(), feature.length() );
- hv_store(tag, keys[1].c_str(), keys[1].length(), sv, 0);
-
- sv = newSVnv(score);
- hv_store(tag, keys[2].c_str(), keys[2].length(), sv, 0);
+ hv_store(tag, tkt_keys[1].c_str(), tkt_keys[1].length(), sv, 0);
+ hv_store(tag, tkt_keys[2].c_str(), tkt_keys[2].length(), newSVnv(score), 0);
av_push( tag_pairs, newRV_inc((SV*)tag) );
}
av_push( tags, newRV_inc((SV*)tag_pairs) );
}
- hv_store(hv, keys[3].c_str(), keys[3].length(), newRV_inc((SV*)tags), 0);
+ hv_store(hv, tkt_keys[3].c_str(), tkt_keys[3].length(), newRV_inc((SV*)tags), 0);
av_push( av, newRV_inc((SV*)hv) );
}

0 comments on commit 4ef214a

Please sign in to comment.