Skip to content

Commit

Permalink
Add automatic support for <UNKNWON-WORD>
Browse files Browse the repository at this point in the history
An interesting phenomenon shows up, described in issue #1351
  • Loading branch information
linas committed Nov 4, 2022
2 parents b4e34af + 8d1d591 commit 8875f2d
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 113 deletions.
36 changes: 22 additions & 14 deletions data/demo-atomese/storage.dict
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@
% obtained above can be rescaled linearly befor being used as a cost.
% The rescaling is as usual: y=mx+b where m==`cost-scale` and
% b==`cost-offset`.
%
% Keep in mind that the parser ranks parses from lowest to highest cost.
% If word-pair links have a negative cost, the parser is incentivized to
% add as many of these as possible, while still resulting in a planar
% graph with many loops.
#define cost-scale -0.5;
#define cost-offset 0.0;

Expand Down Expand Up @@ -133,9 +138,8 @@
% If set to zero, these supplementary links will not be generated. If set
% to 2 or more, then that many extra optional connectors will be added.
%
% If sections are not enabled, then these parameters have no effect.
#define left-pairs 1;
#define right-pairs 1;
% If sections are not enabled, then this parameter has no effect.
#define extra-pairs 1;

% Same as above, but the supplementary connectors will all be of type `ANY`,
% and thus can connect to anything. Since these can connect to anything,
Expand All @@ -144,10 +148,9 @@
% those unknown words. The be effective, the cost should be set high enough
% so that these are more costly than existing word-pairs.
%
% Setting to zero disables this.
% If sections are disabled, the these parameters have no effect.
#define left-any 2;
#define right-any 2;
% This is a bool on/off flag. Setting to zero disables this.
% If sections are disabled, the this parameter has no effect.
#define extra-any 1;

% Create expressions that consist entirely of word-pair relationships.
% The disjuncts will have up to the specified number of connectors; 4 is
Expand All @@ -162,18 +165,23 @@
% Supplement the above with additional connectors of type "ANY". This is
% useful for providing links between words that don't already exist as
% pairs in the dataset. This has no effect, if `pair-disjuncts` (above)
% is set to zero.
#define pair-with-any 2;
% is set to zero. This is a bool on/off value; setting to zero disables.
#define pair-with-any 1;

% Create expressions that consist entirely of "ANY" link-types.
% The disjuncts will have up to the specified number of connectors; 4 is
% the default. Setting this to zero disables the creation of such
% disjuncts. If `enable-sections` and `pair-disjuncts` (above) are turned
% off, the result will be pure random planar tree parsing.
% This is a bool on/off parameter; setting this to zero disables the
% creation of such disjuncts. If `enable-sections` and `pair-disjuncts`
% (above) are turned off, the result will be pure random planar tree parsing.
%
% Each connector has a cost is that is determined by the config parameters,
% above.
#define any-disjuncts 4;
#define any-disjuncts 0;

% Enble the automatic generation of <UNKNOWN-WORD>. It will be
% automatically added to the dictionary, with multi-ANY connectors on it.
% The ANY connectors will be used, irresepctive of the other any setting
% above.
#define enable-unknown-word 1;

% -----------------------
% For this file to be read, at least one bogus entry is needed. It is
Expand Down
20 changes: 9 additions & 11 deletions link-grammar/dict-atomese/local-as.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,15 @@ class Local

// Basic Sections
bool enable_sections;

// Supplements
int left_pairs;
int right_pairs;

int left_any;
int right_any;
int extra_pairs;
bool extra_any;

// Disjuncts made from pairs
int pair_disjuncts;
int pair_with_any;
int any_disjuncts;
bool pair_with_any;
bool any_disjuncts;

bool enable_unknown_word;
};

bool section_boolean_lookup(Dictionary dict, const char *s);
Expand All @@ -64,8 +61,9 @@ bool pair_boolean_lookup(Dictionary dict, const char *s);
Exp* make_exprs(Dictionary dict, const Handle& germ);
Exp* make_sect_exprs(Dictionary dict, const Handle& germ);
Exp* make_pair_exprs(Dictionary dict, const Handle& germ);
Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity);
Exp* make_any_exprs(Dictionary dict, int arity);
Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity, bool any);
Exp* make_any_exprs(Dictionary dict);
Exp* make_cart_any(Dictionary dict, int arity);

void or_enchain(Dictionary, Exp* &orhead, Exp*);
void and_enchain_left(Dictionary, Exp* &orhead, Exp* &ortail, Exp*);
Expand Down
110 changes: 58 additions & 52 deletions link-grammar/dict-atomese/lookup-atomese.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,15 @@ using namespace opencog;

#define ANY_DEFAULT_STRING "any-default"
#define ENABLE_SECTIONS_STRING "enable-sections"
#define EXTRA_PAIRS_STRING "extra-pairs"
#define EXTRA_ANY_STRING "extra-any"

#define PAIR_DISJUNCTS_STRING "pair-disjuncts"
#define PAIR_WITH_ANY_STRING "pair-with-any"

#define LEFT_PAIRS_STRING "left-pairs"
#define RIGHT_PAIRS_STRING "right-pairs"

#define ANY_DISJUNCTS_STRING "any-disjuncts"
#define LEFT_ANY_STRING "left-any"
#define RIGHT_ANY_STRING "right-any"

#define ENABLE_UNKNOWN_WORD_STRING "enable-unknown-word"

/// Shared global
static AtomSpacePtr external_atomspace;
Expand Down Expand Up @@ -162,16 +160,14 @@ bool as_open(Dictionary dict)
local->any_default = atof(LDEF(ANY_DEFAULT_STRING, "3.0"));

local->enable_sections = atoi(LDEF(ENABLE_SECTIONS_STRING, "1"));

local->left_pairs = atoi(LDEF(LEFT_PAIRS_STRING, "1"));
local->right_pairs = atoi(LDEF(RIGHT_PAIRS_STRING, "1"));

local->left_any = atoi(LDEF(LEFT_ANY_STRING, "2"));
local->right_any = atoi(LDEF(RIGHT_ANY_STRING, "2"));
local->extra_pairs = atoi(LDEF(EXTRA_PAIRS_STRING, "1"));
local->extra_any = atoi(LDEF(EXTRA_ANY_STRING, "1"));

local->pair_disjuncts = atoi(LDEF(PAIR_DISJUNCTS_STRING, "4"));
local->pair_with_any = atoi(LDEF(PAIR_WITH_ANY_STRING, "2"));
local->any_disjuncts = atoi(LDEF(ANY_DISJUNCTS_STRING, "4"));
local->pair_with_any = atoi(LDEF(PAIR_WITH_ANY_STRING, "1"));
local->any_disjuncts = atoi(LDEF(ANY_DISJUNCTS_STRING, "0"));

local->enable_unknown_word = atoi(LDEF(ENABLE_UNKNOWN_WORD_STRING, "1"));

dict->as_server = (void*) local;

Expand Down Expand Up @@ -268,14 +264,16 @@ bool as_boolean_lookup(Dictionary dict, const char *s)
bool found = dict_node_exists_lookup(dict, s);
if (found) return true;

if (local->enable_unknown_word and 0 == strcmp(s, "<UNKNOWN-WORD>"))
return true;

if (0 == strcmp(s, LEFT_WALL_WORD))
s = "###LEFT-WALL###";

if (local->enable_sections)
found = section_boolean_lookup(dict, s);

if (0 < local->pair_disjuncts or
0 < local->left_pairs or 0 < local->right_pairs)
if (0 < local->pair_disjuncts or 0 < local->extra_pairs)
{
bool have_pairs = pair_boolean_lookup(dict, s);
found = found or have_pairs;
Expand Down Expand Up @@ -361,25 +359,17 @@ Exp* make_exprs(Dictionary dict, const Handle& germ)
Exp* orhead = nullptr;

// Create disjuncts consisting entirely of "ANY" links.
if (0 < local->any_disjuncts)
if (local->any_disjuncts)
{
Exp* any = make_any_exprs(dict, local->any_disjuncts);
Exp* any = make_any_exprs(dict);
or_enchain(dict, orhead, any);
}

// Create disjuncts consisting entirely of word-pair links.
if (0 < local->pair_disjuncts or
0 < local->left_pairs or 0 < local->right_pairs)
if (0 < local->pair_disjuncts or 0 < local->extra_pairs)
{
Exp* cpr = make_cart_pairs(dict, germ, local->pair_disjuncts);

// Add "ANY" links, if requested.
if (0 < local->pair_with_any)
{
Exp* ap = make_any_exprs(dict, local->pair_with_any);
Exp* dummy;
and_enchain_left(dict, cpr, dummy, ap);
}
Exp* cpr = make_cart_pairs(dict, germ, local->pair_disjuncts,
local->pair_with_any);
or_enchain(dict, orhead, cpr);
}

Expand All @@ -393,19 +383,57 @@ Exp* make_exprs(Dictionary dict, const Handle& germ)
return orhead;
}

/// Given an expression, wrap it with a Dict_node and insert it into
/// the dictionary.
static Dict_node * make_dn(Dictionary dict, Exp* exp, const char* ssc)
{
Dict_node* dn = (Dict_node*) malloc(sizeof(Dict_node));
memset(dn, 0, sizeof(Dict_node));
dn->string = ssc;
dn->exp = exp;

// Cache the result; avoid repeated lookups.
dict->root = dict_node_insert(dict, dict->root, dn);
dict->num_entries++;

lgdebug(+D_SPEC+5, "as_lookup_list %d for >>%s<< nexpr=%d\n",
dict->num_entries, ssc, size_of_expression(exp));

// Rebalance the tree every now and then.
if (0 == dict->num_entries% 30)
{
dict->root = dsw_tree_to_vine(dict->root);
dict->root = dsw_vine_to_tree(dict->root, dict->num_entries);
}

// Perform the lookup. We cannot return the dn above, as the
// as_free_llist() below will delete it, leading to mem corruption.
dn = dict_node_lookup(dict, ssc);
return dn;
}

/// Given a word, return the collection of Dict_nodes holding the
/// expressions for that word.
Dict_node * as_lookup_list(Dictionary dict, const char *s)
{
// Do we already have this word cached? If so, pull from
// the cache.
Dict_node * dn = dict_node_lookup(dict, s);
Dict_node* dn = dict_node_lookup(dict, s);

if (dn) return dn;

const char* ssc = string_set_add(s, dict->string_set);
Local* local = (Local*) (dict->as_server);

if (local->enable_unknown_word and 0 == strcmp(s, "<UNKNOWN-WORD>"))
{
// XXX Note the hard-coded 6. I do not understand why 2 is not
// enough. See issue #1351 for a discussion.
// https://github.com/opencog/link-grammar/issues/1351
Exp* exp = make_cart_any(dict, 6);
return make_dn(dict, exp, ssc);
}

if (0 == strcmp(s, LEFT_WALL_WORD))
s = "###LEFT-WALL###";

Expand Down Expand Up @@ -436,29 +464,7 @@ Dict_node * as_lookup_list(Dictionary dict, const char *s)
if (nullptr == exp)
return nullptr;

dn = (Dict_node*) malloc(sizeof(Dict_node));
memset(dn, 0, sizeof(Dict_node));
dn->string = ssc;
dn->exp = exp;

// Cache the result; avoid repeated lookups.
dict->root = dict_node_insert(dict, dict->root, dn);
dict->num_entries++;

lgdebug(+D_SPEC+5, "as_lookup_list %d for >>%s<< nexpr=%d\n",
dict->num_entries, ssc, size_of_expression(exp));

// Rebalance the tree every now and then.
if (0 == dict->num_entries% 30)
{
dict->root = dsw_tree_to_vine(dict->root);
dict->root = dsw_vine_to_tree(dict->root, dict->num_entries);
}

// Perform the lookup. We cannot return the dn above, as the
// as_free_llist() below will delete it, leading to mem corruption.
dn = dict_node_lookup(dict, ssc);
return dn;
return make_dn(dict, exp, ssc);
}

// This is supposed to provide a wild-card lookup. However,
Expand Down
33 changes: 8 additions & 25 deletions link-grammar/dict-atomese/sections.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,12 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ)
{
Local* local = (Local*) (dict->as_server);
Exp* orhead = nullptr;
Exp* extras = nullptr;

// Create some optional word-pair links; these may be nullptr's.
Exp* left_pairs = make_cart_pairs(dict, germ, local->left_pairs);
Exp* right_pairs = make_cart_pairs(dict, germ, local->right_pairs);

// Create some optional ANY-links; these may be nullptr's.
Exp* left_any = make_any_exprs(dict, local->left_any);
Exp* right_any = make_any_exprs(dict, local->right_any);
if (0 < local->extra_pairs)
extras = make_cart_pairs(dict, germ, local->extra_pairs,
local->extra_any);

// Loop over all Sections on the word.
HandleSeq sects = germ->getIncomingSetByType(SECTION);
Expand Down Expand Up @@ -286,27 +284,12 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ)
continue;
}

// Tack on ANY connectors, as configured.
if (left_any)
{
Exp* optex = make_optional_node(dict->Exp_pool, left_any);
and_enchain_left(dict, andhead, andtail, optex);
}
if (right_any)
{
Exp* optex = make_optional_node(dict->Exp_pool, right_any);
and_enchain_right(dict, andhead, andtail, optex);
}

// Tack on word-pair connectors, as configured.
if (left_pairs)
// Tack on extra connectors, as configured.
if (extras)
{
Exp* optex = make_optional_node(dict->Exp_pool, left_pairs);
Exp* optex = make_optional_node(dict->Exp_pool, extras);
and_enchain_left(dict, andhead, andtail, optex);
}
if (right_pairs)
{
Exp* optex = make_optional_node(dict->Exp_pool, right_pairs);
optex = make_optional_node(dict->Exp_pool, extras);
and_enchain_right(dict, andhead, andtail, optex);
}

Expand Down
Loading

0 comments on commit 8875f2d

Please sign in to comment.