Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 157 lines (136 sloc) 3.778 kb
9e3fa5df »
2011-03-15 Starting treating of missing values
1 /*!
74981964 »
2011-03-15 Handling of missing values by probability theory
2 * @file id3missing.c
9e3fa5df »
2011-03-15 Starting treating of missing values
3 * @brief Treating of missing values.
4 * @author Mihai Maruseac (mihai@rosedu.org)
5 *
6 * @section DESCRIPTION
7 * IAUT1: ID3: Treating of missing values.
8 *
9 * Contains the various implementations of routines to treat missing
10 * attributes from the description.
11 *
12 * @section QUOTE
13 * Intelligence is what you use when you don't know what to do. (Jean Piaget)
14 *
15 * @bug No known bugs.
16 */
17
18 #include "globals.h"
19 #include "id3missing.h"
20
1b8ca3b7 »
2011-03-20 Style problems
21 void numeric_maj_fill_missing(struct example_set *lset, int attr_index,
c3e10fa0 »
2011-03-19 No treating of missing values using id3
22 int miss_index)
9e3fa5df »
2011-03-15 Starting treating of missing values
23 {
173ec256 »
2011-03-15 Done treating misses by majority
24 int sum, count, i;
25
26 count = 0;
27 sum = 0;
28 for (i = 0; i < lset->N; i++) {
c3e10fa0 »
2011-03-19 No treating of missing values using id3
29 SKIPIF(MISS_INDEX(lset->examples[i]->miss, miss_index));
173ec256 »
2011-03-15 Done treating misses by majority
30 sum += lset->examples[i]->attr_ids[attr_index];
31 count += 1;
32 }
33
34 sum /= count;
35 for (i = 0; i < lset->N; i++)
36 if (MISS_INDEX(lset->examples[i]->miss, miss_index)) {
37 lset->examples[i]->attr_ids[attr_index] = sum;
38 lset->examples[i]->miss ^= 1 << miss_index;
39 }
9e3fa5df »
2011-03-15 Starting treating of missing values
40 }
41
81af6f0d »
2011-03-15 Changed to include description in all missing treating functions
42 void numeric_prb_fill_missing(const struct description *descr,
c3e10fa0 »
2011-03-19 No treating of missing values using id3
43 struct example_set *lset, int attr_index,
44 int miss_index)
9e3fa5df »
2011-03-15 Starting treating of missing values
45 {
74981964 »
2011-03-15 Handling of missing values by probability theory
46 int K, i, j, max, imax, c, **counts, C, *vals, ind, v;
47
48 K = descr->K;
49 C = lset->N;
50 counts = calloc(K, sizeof(counts[0]));
51 for (i = 0; i < K; i++)
52 counts[i] = calloc(C, sizeof(counts[i][0]));
53 vals = calloc(C, sizeof(vals[0]));
54
55 C = 0;
56 for (i = 0; i < lset->N; i++) {
c3e10fa0 »
2011-03-19 No treating of missing values using id3
57 SKIPIF(MISS_INDEX(lset->examples[i]->miss, miss_index));
74981964 »
2011-03-15 Handling of missing values by probability theory
58 c = lset->examples[i]->class_id;
59 v = lset->examples[i]->attr_ids[attr_index];
60 for (ind = 0; ind < C; ind++)
61 if (vals[ind] == v) {
62 counts[c][ind]++;
63 break;
64 }
c3e10fa0 »
2011-03-19 No treating of missing values using id3
65 SKIPIF(ind < C);
74981964 »
2011-03-15 Handling of missing values by probability theory
66 counts[c][C]++;
67 vals[C++] = v;
68 }
69
70 for (i = 0; i < lset->N; i++)
71 if (MISS_INDEX(lset->examples[i]->miss, miss_index)) {
72 c = lset->examples[i]->class_id;
73 imax = 0;
74 max = counts[c][imax];
75 for (j = 1; j < C; j++)
76 if (max < counts[c][j]) {
77 max = counts[c][j];
78 imax = j;
79 }
80 lset->examples[i]->attr_ids[attr_index] = vals[imax];
81 lset->examples[i]->miss ^= 1 << miss_index;
82 }
83
84 for (i = 0; i < K; i++)
85 free(counts[i]);
86 free(counts);
87 free(vals);
9e3fa5df »
2011-03-15 Starting treating of missing values
88 }
89
81af6f0d »
2011-03-15 Changed to include description in all missing treating functions
90 void discrete_maj_fill_missing(const struct description *descr,
c3e10fa0 »
2011-03-19 No treating of missing values using id3
91 struct example_set *lset, int attr_index,
92 int miss_index)
9e3fa5df »
2011-03-15 Starting treating of missing values
93 {
173ec256 »
2011-03-15 Done treating misses by majority
94 int *counts, i, max, imax, C;
95
96 C = descr->attribs[attr_index]->C;
97 counts = calloc(C, sizeof(counts[0]));
98 for (i = 0; i < lset->N; i++) {
c3e10fa0 »
2011-03-19 No treating of missing values using id3
99 SKIPIF(MISS_INDEX(lset->examples[i]->miss, miss_index));
173ec256 »
2011-03-15 Done treating misses by majority
100 counts[lset->examples[i]->attr_ids[attr_index]]++;
101 }
102
103 imax = 0;
104 max = counts[imax];
105 for (i = 1; i < C; i++)
106 if (max < counts[i]) {
107 max = counts[i];
108 imax = i;
109 }
110
111 for (i = 0; i < lset->N; i++)
112 if (MISS_INDEX(lset->examples[i]->miss, miss_index)) {
113 lset->examples[i]->attr_ids[attr_index] = imax;
114 lset->examples[i]->miss ^= 1 << miss_index;
115 }
116
117 free(counts);
9e3fa5df »
2011-03-15 Starting treating of missing values
118 }
119
81af6f0d »
2011-03-15 Changed to include description in all missing treating functions
120 void discrete_prb_fill_missing(const struct description *descr,
c3e10fa0 »
2011-03-19 No treating of missing values using id3
121 struct example_set *lset, int attr_index,
122 int miss_index)
9e3fa5df »
2011-03-15 Starting treating of missing values
123 {
74981964 »
2011-03-15 Handling of missing values by probability theory
124 int K, C, i, **counts, c, j, max, imax;
125
126 K = descr->K;
127 C = descr->attribs[attr_index]->C;
128 counts = calloc(K, sizeof(counts[0]));
129 for (i = 0; i < K; i++)
130 counts[i] = calloc(C, sizeof(counts[i][0]));
131
132 for (i = 0; i < lset->N; i++) {
c3e10fa0 »
2011-03-19 No treating of missing values using id3
133 SKIPIF(MISS_INDEX(lset->examples[i]->miss, miss_index));
74981964 »
2011-03-15 Handling of missing values by probability theory
134 c = lset->examples[i]->class_id;
135 counts[c][lset->examples[i]->attr_ids[attr_index]]++;
136 }
137
138 for (i = 0; i < lset->N; i++)
139 if (MISS_INDEX(lset->examples[i]->miss, miss_index)) {
140 c = lset->examples[i]->class_id;
141 imax = 0;
142 max = counts[c][imax];
143 for (j = 1; j < C; j++)
144 if (max < counts[c][j]) {
145 max = counts[c][j];
146 imax = j;
147 }
148 lset->examples[i]->attr_ids[attr_index] = imax;
149 lset->examples[i]->miss ^= 1 << miss_index;
150 }
151
152 for (i = 0; i < K; i++)
153 free(counts[i]);
154 free(counts);
9e3fa5df »
2011-03-15 Starting treating of missing values
155 }
156
Something went wrong with that request. Please try again.