adapted naming to harry configuration

rieck · Mar 10, 2015 · a0bf2f1 · a0bf2f1
1 parent eb4203e
commit a0bf2f1
Show file tree

Hide file tree

Showing 11 changed files with 37 additions and 37 deletions.
diff --git a/doc/example.cfg b/doc/example.cfg
@@ -37,7 +37,7 @@ features = {
     ngram_len = 1;
 
     # Granularity of n-grams: bytes or tokens
-    ngram_gran = "tokens";
+    granularity = "tokens";
 
     # Delimiters for token n-grams, e.g. " %0a%0d"
     ngram_delim = " %0a%0d";

diff --git a/doc/sally.pod b/doc/sally.pod
@@ -191,12 +191,12 @@ following parameters.
 
 =item B<ngram_len = 2;>
 
-=item B<ngram_gran = "bytes";>
+=item B<granularity = "bytes";>
 
 The parameter B<ngram_len> specifies the numbers of consecutive symbols that
-are considered as one feature, while the parameter B<ngram_gran> defines the
+are considered as one feature, while the parameter B<granularity> defines the
 granularity of these symbols.  If the granularity is set to I<bytes>,
-B<sally> considers bytes as symbols, whereas if B<ngram_gran> is set to
+B<sally> considers bytes as symbols, whereas if B<granularity> is set to
 I<tokens>, all strings (token) separated by a set of delimiters are
 considered as symbols.  The following types of different feature types can
 be extracted using these parameters:
@@ -209,7 +209,7 @@ The strings are partitioned into substrings (tokens) using a set of
 delimiter characters.  Such partitioning is typical for natural language
 processing, where the delimiters are usually defined as white-space and
 punctuation symbols.  An embedding using tokens is selected by choosing
-I<tokens> as granularity (B<ngram_gran>), defining a set of delimiter
+I<tokens> as granularity (B<granularity>), defining a set of delimiter
 characters (B<ngram_delim>) and setting the n-gram length to 1
 (B<ngram_len>).
 
@@ -219,7 +219,7 @@ The strings are characterized by all possible byte sequences of a fixed
 length n (byte n-grams).  These features are frequently used if no
 information about the structure of strings is available, such as in
 bioinformatics or computer security.  An embedding using byte n-grams is
-selected by choosing I<bytes> as granularity (B<ngram_gran>) and defining
+selected by choosing I<bytes> as granularity (B<granularity>) and defining
 the n-gram length (B<ngram_len>).
 
 =item I<token n-grams>
@@ -229,7 +229,7 @@ length n (token n-grams).  These features require the definition of a set of
 delimiters and a length n.  They are often used in natural language
 processing as a coarse way for capturing structure of text.  An embedding
 using token n-grams is selected by choosing I<tokens> as granularity
-(B<ngram_gran>), defining a set of delimiter characters (B<ngram_delim>) and
+(B<granularity>), defining a set of delimiter characters (B<ngram_delim>) and
 choosing an n-gram length (B<ngram_len>).
 
 =back
@@ -555,7 +555,7 @@ can be changed by using the command-line option B<--xx
 
   -n,  --ngram_len <num>         Set length of n-grams.
   -d,  --ngram_delim <delim>     Set delimiters of tokens in n-grams.
-  -g   --ngram_gran <type>       Set granularity: bytes, tokens.
+  -g   --granularity <type>      Set granularity: bytes, tokens.
   -p,  --ngram_pos               Enable positional n-grams.
        --pos_shift <num>         Set shift of positional n-grams.
   -B,  --ngram_blend             Enable blended n-grams.

diff --git a/src/fvec/fvec.c b/src/fvec/fvec.c
@@ -127,7 +127,7 @@ fvec_t *fvec_extract_intern2(char *x, int l, int n)
     }
 
     /* Get configuration */
-    config_lookup_string(&cfg, "features.ngram_gran", &granu);
+    config_lookup_string(&cfg, "features.granularity", &granu);
     config_lookup_bool(&cfg, "features.ngram_pos", &pos);
     config_lookup_int(&cfg, "features.pos_shift", &shift);
 

diff --git a/src/sally.c b/src/sally.c
@@ -46,7 +46,7 @@ static struct option longopts[] = {
     {"reverse_str", 0, NULL, 1007},
     {"stoptoken_file", 1, NULL, 1008},
     {"ngram_len", 1, NULL, 'n'},
-    {"ngram_gran", 1, NULL, 'g'},
+    {"granularity", 1, NULL, 'g'},
     {"ngram_delim", 1, NULL, 'd'},
     {"ngram_pos", 0, NULL, 'p'},
     {"pos_shift", 1, NULL, 1012},       /* <- last entry */
@@ -114,7 +114,7 @@ static void print_usage(void)
            "  -k,  --skip_null               Skip null vectors in output.\n"
            "\nFeature options:\n"
            "  -n,  --ngram_len <num>         Set length of n-grams.\n"
-           "  -g   --ngram_gran <type>       Set granularity: bytes, tokens.\n"
+           "  -g   --granularity <type>      Set granularity: bytes, tokens.\n"
            "  -d,  --ngram_delim <delim>     Set delimiters of tokens in n-grams.\n"
            "  -p,  --ngram_pos               Enable positional n-grams.\n"
            "       --pos_shift <num>         Set shift of positional n-grams.\n"
@@ -215,7 +215,7 @@ static void sally_parse_options(int argc, char **argv)
             config_set_int(&cfg, "features.ngram_len", atoi(optarg));
             break;
         case 'g':
-            config_set_string(&cfg, "features.ngram_gran", optarg);
+            config_set_string(&cfg, "features.granularity", optarg);
             break;
         case 'd':
             config_set_string(&cfg, "features.ngram_delim", optarg);
@@ -343,9 +343,9 @@ static void sally_load_config(int argc, char **argv)
                   config_error_text(&cfg), config_error_line(&cfg));
 
         /* Check for new granularity parameter */
-        ret = config_lookup_string(&cfg, "features.ngram_gran", &str);
+        ret = config_lookup_string(&cfg, "features.granularity", &str);
         if (ret == CONFIG_FALSE)
-            fatal("Your configuration is missing the new 'ngram_gran' "
+            fatal("Your configuration is missing the new 'granularity' "
                   "parameter. Please consult the manual page and upgrade "
                   "your configuration.");
     }

diff --git a/src/sconfig.c b/src/sconfig.c
@@ -36,7 +36,7 @@ static config_default_t defaults[] = {
     {"input", "reverse_str", CONFIG_TYPE_BOOL, {.num = CONFIG_FALSE}},
     {"input", "stoptoken_file", CONFIG_TYPE_STRING, {.str = ""}},
     {"features", "ngram_len", CONFIG_TYPE_INT, {.num = 1}},
-    {"features", "ngram_gran", CONFIG_TYPE_STRING, {.str = "tokens"}},
+    {"features", "granularity", CONFIG_TYPE_STRING, {.str = "tokens"}},
     {"features", "ngram_delim", CONFIG_TYPE_STRING, {.str = "%20%0a%0d"}},
     {"features", "ngram_pos", CONFIG_TYPE_BOOL, {.num = CONFIG_FALSE}},
     {"features", "pos_shift", CONFIG_TYPE_INT, {.num = 0}},
@@ -236,7 +236,7 @@ int config_check(config_t *cfg)
     	return 0;
     }
 
-    config_lookup_string(cfg, "features.ngram_gran", &s1);
+    config_lookup_string(cfg, "features.granularity", &s1);
     config_lookup_string(cfg, "features.ngram_delim", &s2);
     if (!strcasecmp(s1, "tokens") && strlen(s2) == 0) {
         error("Delimiters are required if the granularity is tokens.");

diff --git a/tests/config1.cfg b/tests/config1.cfg
@@ -37,7 +37,7 @@ features = {
     ngram_len = 2;
 
     # Granulatiy of n-grams: bytes or tokens
-    ngram_gran = "tokens";
+    granularity = "tokens";
 
     # Delimiters for n-grams, e.g. " %0a%0d" 
     ngram_delim = "%0a%0d ";

diff --git a/tests/config2.cfg b/tests/config2.cfg
@@ -37,7 +37,7 @@ features = {
     ngram_len = 2;
 
     # Granularity of n-grams: bytes or tokens
-    ngram_gran = "bytes";
+    granularity = "bytes";
 
     # Delimiters for n-grams, e.g. " %0a%0d"
     ngram_delim = "";

diff --git a/tests/config3.cfg b/tests/config3.cfg
@@ -37,7 +37,7 @@ features = {
     ngram_len = 1;
 
     # Granularity of n-grams: bytes or tokens
-    ngram_gran = "tokens";
+    granularity = "tokens";
 
     # Delimiters for n-grams, e.g. " %0a%0d"
     ngram_delim = " %0a%0d";

diff --git a/tests/test_embed.c b/tests/test_embed.c
@@ -192,7 +192,7 @@ int main(int argc, char **argv)
     config_init(&cfg);
     config_check(&cfg);
 
-    config_set_string(&cfg, "features.ngram_gran", "tokens");
+    config_set_string(&cfg, "features.granularity", "tokens");
     config_set_string(&cfg, "features.ngram_delim", " .,%0a%0d");
     config_set_int(&cfg, "features.ngram_len", 1);
     config_set_string(&cfg, "input.input_format", "lines");

diff --git a/tests/test_fvec.c b/tests/test_fvec.c
@@ -60,9 +60,9 @@ void init_sally(test_t t)
 
     /* Set granularity depending on delimiters */    
     if (strlen(t.dlm) > 0)
-        config_set_string(&cfg, "features.ngram_gran", "tokens");
+        config_set_string(&cfg, "features.granularity", "tokens");
     else
-        config_set_string(&cfg, "features.ngram_gran", "bytes");
+        config_set_string(&cfg, "features.granularity", "bytes");
 }
 
 

diff --git a/tests/test_ngrams.c b/tests/test_ngrams.c
@@ -7,7 +7,7 @@
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 3 of the License, or (at your
  * option) any later version.  This program is distributed without any
- * warranty. See the GNU General Public License for more details. 
+ * warranty. See the GNU General Public License for more details.
  */
 
 #include "tests.h"
@@ -27,7 +27,7 @@ typedef struct
     int nlen;
     int flag;
     /* Number of n-grams */
-    int len;    
+    int len;
 } test_t;
 
 int test_sorted_ngrams()
@@ -47,13 +47,13 @@ int test_sorted_ngrams()
     test_printf("Testing sorted n-grams");
 
     /* Hack to set delimiters */
-    config_set_string(&cfg, "features.ngram_gran", "tokens");
+    config_set_string(&cfg, "features.granularity", "tokens");
     config_set_string(&cfg, "features.ngram_delim", " ");
-    fvec_delim_set(" ");     
+    fvec_delim_set(" ");
 
     for (i = 0; t[i].str; i++) {
         config_set_int(&cfg, "features.ngram_len", t[i].nlen);
-        config_set_bool(&cfg, "features.ngram_sort", t[i].flag); 
+        config_set_bool(&cfg, "features.ngram_sort", t[i].flag);
 
         /* Extract features */
         f = fvec_extract(t[i].str, strlen(t[i].str));
@@ -67,7 +67,7 @@ int test_sorted_ngrams()
         fvec_destroy(f);
     }
 
-    config_set_bool(&cfg, "features.ngram_sort", 0); 
+    config_set_bool(&cfg, "features.ngram_sort", 0);
 
     test_return(err, i);
     return err;
@@ -90,13 +90,13 @@ int test_blended_ngrams()
     test_printf("Testing blended n-grams");
 
     /* Hack to set delimiters */
-    config_set_string(&cfg, "features.ngram_gran", "tokens");    
+    config_set_string(&cfg, "features.granularity", "tokens");
     config_set_string(&cfg, "features.ngram_delim", " ");
-    fvec_delim_set(" ");     
+    fvec_delim_set(" ");
 
     for (i = 0; t[i].str; i++) {
         config_set_int(&cfg, "features.ngram_len", t[i].nlen);
-        config_set_bool(&cfg, "features.ngram_blend", t[i].flag); 
+        config_set_bool(&cfg, "features.ngram_blend", t[i].flag);
 
         /* Extract features */
         f = fvec_extract(t[i].str, strlen(t[i].str));
@@ -110,7 +110,7 @@ int test_blended_ngrams()
         fvec_destroy(f);
     }
 
-    config_set_bool(&cfg, "features.ngram_blend", 0); 
+    config_set_bool(&cfg, "features.ngram_blend", 0);
 
     test_return(err, i);
     return err;
@@ -134,15 +134,15 @@ int test_pos_ngrams()
     test_printf("Testing positional n-grams");
 
     /* Hack to set delimiters */
-    config_set_string(&cfg, "features.ngram_gran", "tokens");    
+    config_set_string(&cfg, "features.granularity", "tokens");
     config_set_string(&cfg, "features.ngram_delim", " ");
-    fvec_delim_set(" ");     
+    fvec_delim_set(" ");
 
     for (i = 0; t[i].str; i++) {
-    
+
         config_set_int(&cfg, "features.ngram_len", t[i].nlen);
         config_set_bool(&cfg, "features.ngram_pos", t[i].flag);
-        config_set_int(&cfg, "features.pos_shift", 0); 
+        config_set_int(&cfg, "features.pos_shift", 0);
 
         /* Extract features */
         f = fvec_extract(t[i].str, strlen(t[i].str));
@@ -173,7 +173,7 @@ int main(int argc, char **argv)
     /* Create config */
     config_init(&cfg);
     config_check(&cfg);
-    
+
     fhash_init();
 
     err |= test_sorted_ngrams();