Skip to content

Commit

Permalink
Added filter value and computed indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
Mihai Maruseac committed Mar 15, 2011
1 parent ceeb283 commit d2d6395
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 12 deletions.
14 changes: 11 additions & 3 deletions globals.c
Expand Up @@ -66,7 +66,7 @@ struct attribute *read_attribute(FILE *file)

for (i = 0; i < attr->C; i++) {
CHECK(fscanf(file, "%ms", &tmp) == 1, fail);
attr->ptr[i] = strdup(tmp);
attr->ptr[i] = (int)strdup(tmp);
tmp = free_and_set_NULL(tmp);
}

Expand Down Expand Up @@ -159,7 +159,7 @@ int get_index_from_descr(const char *string,

l = strlen(string);
for (index = 0; index < attr->C; index++)
if (strncmp(string, attr->ptr[index], l) == 0)
if (strncmp(string, (char *)attr->ptr[index], l) == 0)
return index;
fail:
error_in_set = 1;
Expand Down Expand Up @@ -299,6 +299,14 @@ void write_example(const struct example *ex,
fprintf(file, "%s\n", descr->classes[ex->class_id]);
}

void clear_filter_info(struct example_set *lset)
{
int i;

for (i = 0; i < lset->N; i++)
lset->examples[i]->filter = 0;
}

void free_attribute(struct attribute *ptr)
{
int i;
Expand All @@ -314,7 +322,7 @@ void free_attribute(struct attribute *ptr)

CHECK(ptr->type != NUMERIC, ok);
for (i = 0; i < ptr->C; i++)
free_and_set_NULL(ptr->ptr[i]);
free_and_set_NULL((char *)ptr->ptr[i]);

ok:
free_and_set_NULL(ptr->ptr);
Expand Down
35 changes: 27 additions & 8 deletions globals.h
Expand Up @@ -66,23 +66,24 @@ enum attr_type {
/**
* @brief Structure used to represent a single attribute.
*
* If the attribute is numeric, the ptr vector will not be used. It would have
* been relevant only when the structure would be used in a real example: it
* would contain an index into the example data, sorted by the increasing
* values of the attribute. However, at a later stage in the learning process,
* the index would have to be recomputed again by a slower process overall
* than the sorting of relevant samples using quicksort.
* If the attribute is numeric, the ptr vector will be used only after reading
* the learning example. It will be an index into the example data, used to
* sort in ascending order the example data by this attribute. If at a later
* time the example is splitted, the index will remain the same but some of
* the values will not be relevant for that example. However, code doing this
* will take care of that, thus, it is not a concern right now.
*
* Otherwise, the ptr will point to a vector of names, used for the discrete
* values.
* values. Each name can be obtained by converting the integer to a char* and
* looking at the resulting memory location.
*/
struct attribute {
/** Name of the attribute */
char *name;
/** Type of attribute */
enum attr_type type;
/** Type-dependent pointer (see description) */
void **ptr;
int *ptr;
/** Length of vector pointed to by ptr */
int C;
};
Expand Down Expand Up @@ -125,6 +126,12 @@ struct example {
int *attr_ids;
/** Flag for missing values */
int miss;
/** This flag may be used to represent the fact that this example
* should be discarded in one loop. It is the user's job to update
* this flag to relevant values. The only guarantee is that this flag
* is set to 0 when this structure is created.
*/
int filter;
};

/**
Expand Down Expand Up @@ -286,6 +293,18 @@ static int record_missing(int index, struct example_set *set);
static int get_index_from_descr(const char *string,
const struct attribute *attr);

/**
* @brief Deletes the filtering information associated by each example from
* the example set.
*
* As you recall (see the struct example documentation), the filter flag can
* be used to implement filtering of examples from the example set in a opaque
* way to other users of the same structure. However, to ensure consistency,
* each user must call this function to reset the filter value to the original
* value.
*/
void clear_filter_info(struct example_set *lset);

/**
* @brief Frees the memory allocated to one description.
*
Expand Down
34 changes: 33 additions & 1 deletion id3learn.c
Expand Up @@ -33,6 +33,8 @@ int id3_learn_bootstrap_file(int num_handle, int missing_handle,

/* fill in missing arguments */
id3_treat_missing(descr, lset, missing_handle);
/* built indexes for numeric arguments */
id3_build_index(descr, lset);
/* start the learning process */
id3_learn(descr, lset, num_handle);

Expand All @@ -54,8 +56,10 @@ void id3_learn(const struct description *descr,
const struct example_set *lset,
int num_handle)
{
/* write_description(descr, stdout);*/
#if 0
write_description(descr, stdout);
write_set(lset, descr, stdout);
#endif
}

void id3_treat_missing(const struct description *descr,
Expand Down Expand Up @@ -98,3 +102,31 @@ void id3_treat_missing(const struct description *descr,
}
}

void id3_build_index(const struct description *descr,
const struct example_set *lset)
{
int i, j, k, ii, jj;

for (i = 0; i < descr->M; i++) {
if (descr->attribs[i]->type != NUMERIC)
continue;
descr->attribs[i]->C = lset->N;
descr->attribs[i]->ptr = calloc(lset->N,
sizeof(descr->attribs[i]->ptr));

for (j = 0; j < lset->N; j++)
descr->attribs[i]->ptr[j] = j;

/* sorting in place in O(N^2) */
for (j = 0; j < lset->N; j++)
for (k = j + 1; k < lset->N; k++) {
ii = descr->attribs[i]->ptr[j];
jj = descr->attribs[i]->ptr[k];
if (lset->examples[ii]->attr_ids[i] >
lset->examples[jj]->attr_ids[i]) {
descr->attribs[i]->ptr[k] = ii;
descr->attribs[i]->ptr[j] = jj;
}
}
}
}
13 changes: 13 additions & 0 deletions id3learn.h
Expand Up @@ -55,5 +55,18 @@ static void id3_learn(const struct description *descr,
static void id3_treat_missing(const struct description *descr,
struct example_set *lset, int missing_handle);

/**
* @brief Builds indexes for numeric attributes.
*
* The indexes are used to implement handling of numeric attributes and to
* sort the input set, if needed. Subsets of the input will keep the same
* index, although some values may be missing in them.
*
* @param descr The description of the problem.
* @param lset The learning set
*/
static void id3_build_index(const struct description *descr,
const struct example_set *lset);

#endif

0 comments on commit d2d6395

Please sign in to comment.