Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
#include "api.h" | ||
#include "vtypes.h" | ||
#include "varray.h" | ||
#include "vword.h" | ||
#include "util.h" | ||
#include "result-codes.h" | ||
#include "symbol-table.h" | ||
|
@@ -389,10 +390,11 @@ varnam_learn_internal(varnam *handle, const char *word, int confidence) | |
int | ||
varnam_learn(varnam *handle, const char *word) | ||
{ | ||
int rc; | ||
#ifdef _RECORD_EXEC_TIME | ||
V_BEGIN_TIMING | ||
#endif | ||
int rc,i; | ||
varray *stem_results; | ||
#ifdef _RECORD_EXEC_TIME | ||
V_BEGIN_TIMING | ||
#endif | ||
|
||
reset_pool (handle); | ||
|
||
|
@@ -409,6 +411,16 @@ varnam_learn(varnam *handle, const char *word) | |
return rc; | ||
} | ||
|
||
|
||
stem_results= varray_init(); | ||
This comment has been minimized.
Sorry, something went wrong.
navaneeth
Collaborator
|
||
rc = varnam_stem(handle, word, stem_results); | ||
if(rc != VARNAM_SUCCESS) | ||
return rc; | ||
for(i=0;i<=stem_results->index;i++) | ||
This comment has been minimized.
Sorry, something went wrong. |
||
varnam_learn_internal(handle, ((vword*)varray_get(stem_results, i))->text, 1); | ||
|
||
varray_free(stem_results, *destroy_word); | ||
This comment has been minimized.
Sorry, something went wrong. |
||
|
||
rc = vwt_end_changes (handle); | ||
if (rc != VARNAM_SUCCESS) | ||
return rc; | ||
|
@@ -689,3 +701,168 @@ varnam_is_known_word(varnam* handle, const char* word) | |
else | ||
return 0; | ||
} | ||
|
||
int varnam_check_exception(varnam *handle, strbuf *word_buffer, strbuf *end_buffer) | ||
This comment has been minimized.
Sorry, something went wrong.
navaneeth
Collaborator
|
||
{ | ||
sqlite3 *db; | ||
sqlite3_stmt *stmt; | ||
strbuf *syllable = strbuf_init(8); | ||
This comment has been minimized.
Sorry, something went wrong. |
||
int rc; | ||
char *sql = "select exception from stem_exceptions where stem = ?1"; | ||
|
||
db = handle->internal->db; | ||
|
||
rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); | ||
if(rc != SQLITE_OK) | ||
{ | ||
set_last_error(handle, "Failed to initialize statement : %s", sqlite3_errmsg(db)); | ||
sqlite3_finalize( stmt ); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
rc = sqlite3_bind_text(stmt, 1, strbuf_to_s(end_buffer), -1, NULL); | ||
if(rc != SQLITE_OK) | ||
{ | ||
set_last_error(handle, "Failed to initialize statement : %s", sqlite3_errmsg(db)); | ||
sqlite3_finalize( stmt ); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
rc = vst_get_last_syllable(handle, word_buffer, syllable); | ||
if(rc != VARNAM_SUCCESS) | ||
{ | ||
set_last_error(handle, "Could not obtain last syllable"); | ||
return VARNAM_SUCCESS; | ||
} | ||
|
||
rc = sqlite3_step(stmt); | ||
if(rc == SQLITE_ROW) | ||
{ | ||
if(sqlite3_column_bytes(stmt,0) != 0) | ||
{ | ||
if(strcmp(strbuf_to_s(syllable), (char*)sqlite3_column_blob(stmt, 0)) == 0) | ||
{ | ||
strbuf_destroy(syllable); | ||
return VARNAM_STEMRULE_HIT; | ||
} | ||
else | ||
{ | ||
strbuf_destroy(syllable); | ||
return VARNAM_STEMRULE_MISS; | ||
} | ||
} | ||
} | ||
else if(rc == SQLITE_DONE) | ||
{ | ||
strbuf_destroy(syllable); | ||
return VARNAM_SUCCESS; | ||
} | ||
|
||
strbuf_destroy(syllable); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
/*Searches the symbol table to see if the old_ending constitutes a stem rule*/ | ||
int | ||
get_stem(varnam* handle, strbuf* old_ending, strbuf *new_ending) | ||
This comment has been minimized.
Sorry, something went wrong. |
||
{ | ||
sqlite3 *db; | ||
sqlite3_stmt *stmt; | ||
int rc; | ||
const char *sql="select new_ending from stemrules where old_ending = ?1;"; | ||
|
||
db = handle->internal->db; | ||
|
||
rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); | ||
if(rc != SQLITE_OK) | ||
{ | ||
set_last_error(handle, "Failed to prepare statement : %s", sqlite3_errmsg(db)); | ||
sqlite3_finalize( stmt ); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
sqlite3_bind_text(stmt, 1, strbuf_to_s(old_ending), -1, NULL); | ||
|
||
rc = sqlite3_step(stmt); | ||
|
||
if(rc == SQLITE_ROW) | ||
{ | ||
strbuf_clear(new_ending); | ||
strbuf_add(new_ending, (char*)sqlite3_column_text(stmt, 0)); | ||
sqlite3_finalize(stmt); | ||
return VARNAM_STEMRULE_HIT; | ||
} | ||
else if(rc == SQLITE_DONE) | ||
{ | ||
sqlite3_finalize(stmt); | ||
return VARNAM_STEMRULE_MISS; | ||
} | ||
else | ||
{ | ||
sqlite3_finalize(stmt); | ||
set_last_error(handle, "Sqlite error : %s", sqlite3_errmsg(db)); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
} | ||
|
||
int varnam_stem(varnam *handle, const char *word, varray *stem_results) | ||
{ | ||
int rc; | ||
strbuf *word_buffer, *end_buffer, *new_ending, *temp; | ||
char *ending; | ||
|
||
word_buffer = strbuf_init(8); | ||
This comment has been minimized.
Sorry, something went wrong. |
||
end_buffer = strbuf_init(8); | ||
This comment has been minimized.
Sorry, something went wrong.
navaneeth
Collaborator
|
||
temp = strbuf_init(8); | ||
new_ending = strbuf_init(8); | ||
strbuf_add(word_buffer, word); | ||
|
||
while(word_buffer->length > 0) | ||
{ | ||
/*the next character of word_buffer should go | ||
to the beginning of the end_bufer. For this | ||
we copy end_buffer to temp, clear end_buffer, | ||
add new ending to end_buffer and append the | ||
contents of temp back to end_buffer*/ | ||
strbuf_clear(temp); | ||
strbuf_add(temp, strbuf_to_s(end_buffer)); | ||
strbuf_clear(end_buffer); | ||
ending = strbuf_get_ending(word_buffer); | ||
strbuf_add(end_buffer, ending); | ||
strbuf_add(end_buffer, strbuf_to_s(temp)); | ||
strbuf_remove_from_last(word_buffer, ending); | ||
|
||
rc = get_stem(handle, end_buffer, new_ending); | ||
if(rc == VARNAM_STEMRULE_HIT) | ||
{ | ||
rc = varnam_check_exception(handle, word_buffer, end_buffer); | ||
if(rc == VARNAM_STEMRULE_HIT) | ||
continue; | ||
|
||
|
||
strbuf_add(word_buffer, strbuf_to_s(new_ending)); | ||
/*Creating a vword using Word() | ||
word_buffer will change in subsequent iterations of the loop | ||
So pushing a pointer to word_buffer->buffer to varray is of | ||
no use. So we create a vword for each word that is to be learned | ||
and push it to the varray*/ | ||
varray_push(stem_results, Word(handle, (char*)strbuf_to_s(word_buffer), 0)); | ||
strbuf_clear(end_buffer); | ||
} | ||
else if(rc != VARNAM_STEMRULE_MISS) | ||
{ | ||
free(ending); | ||
set_last_error(handle, "stemrule query failed"); | ||
return VARNAM_ERROR; | ||
} | ||
|
||
free(ending); | ||
} | ||
|
||
strbuf_destroy(temp); | ||
strbuf_destroy(word_buffer); | ||
strbuf_destroy(end_buffer); | ||
strbuf_destroy(new_ending); | ||
return VARNAM_SUCCESS; | ||
} |
3 comments
on commit ea4acac
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall good. Some minor corrections which I have commented inline. I have created issue #52 to track the whole merge and putting into production.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CMake is failing. Did you forget to commit something?
CMake Error at examples/CMakeLists.txt:16 (add_executable):
Cannot find source file:
stemmer.c
Tried extensions .c .C .c++ .cc .cpp .cxx .m .M .mm .h .hh .h++ .hm .hpp
.hxx .in .txx
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should expose the
varnam_stem
as a first class API. Stemming is just an implementation detail and I don't think clients will be interested in it. So move this as a private method to where persisting of stemming happens.