Skip to content
Newer
Older
100644 298 lines (216 sloc) 11.2 KB
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
1 //
5291e63 @smendes Added license stuff
smendes authored Mar 28, 2010
2 // BKBayesianClassifier.h
3 // Licensed under the terms of the BSD License, as specified below.
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
4 //
5
5291e63 @smendes Added license stuff
smendes authored Mar 27, 2010
6 /*
7 Copyright (c) 2010, Samuel Mendes
8
9 All rights reserved.
10
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of ᐱ nor the names of its
22 contributors may be used to endorse or promote products derived
23 from this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
31 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
38 #import <Foundation/Foundation.h>
39
a2bd13c @smendes Refactoring: extra "Bayesian" in the name removed
smendes authored Apr 14, 2010
40 #import <BayesianKit/BKDataPool.h>
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
41 #import <BayesianKit/BKTokenizing.h>
42
6d5127c @smendes Put corpus pool's name as a public string
smendes authored Apr 14, 2010
43
9970412 @smendes API Documentation
smendes authored Apr 15, 2010
44 /** Implementation of a naive bayesian classifier.
45
46 BKClassifier is provided with a default setup using Robinson-Fisher
47 probabilities combiner and a ParseKit-based tokenizer.
48
49 Using methods @c initWithContentsOfFile:() and @c writeToFile:() the
50 classifier's training can be saved and reloaded. Note that if you change the
51 probabilities combiner or the tokenizer, those changes are not saved in the
52 file. You need to reapply thoses changes after reloading the classifier.
53
54 To train the classifier use @c trainWithFile:forPoolNamed:() or
55 @c trainWithString:forPoolNamed:(). At the end of those methods
56 @c updatePoolsProbabilities() will be automatically called and probabilities
57 associated to each tokens will be re-computed.
44ac5d5 @smendes Added a beginning of doxygen documentation
smendes authored Apr 14, 2010
58
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
59 Once trained the classifier can be immediatly used with @c guessWithFile:() or
60 @c guessWithString:(). Both returns a dictionary containing the score, in
61 percent for each pool.
62
63 To avoid unecessary big pools, @c stripToLevel:() will remove any token with a
64 total count lower than specified.
44ac5d5 @smendes Added a beginning of doxygen documentation
smendes authored Apr 14, 2010
65 */
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
66 @interface BKClassifier : NSObject <NSCoding> {
a2bd13c @smendes Refactoring: extra "Bayesian" in the name removed
smendes authored Apr 14, 2010
67 BKDataPool *corpus;
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
68
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
69 NSMutableDictionary *pools;
70 BOOL dirty;
71
92c61da @smendes Probability combiner can be changed now.
smendes authored Apr 13, 2010
72 NSInvocation *probabilitiesCombinerInvocation;
73
28ea0f0 @smendes Classifier can use another tokenizer now
smendes authored Apr 13, 2010
74 id<BKTokenizing> tokenizer;
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
75 }
76
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
77 //////////////////////////////////////////////////////////////////////////////////////////
78 /// @name Properties
79 //////////////////////////////////////////////////////////////////////////////////////////
80
81 /** Dictionary containing every data pools of the classifier */
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
82 @property (readonly) NSMutableDictionary *pools;
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
83
84 /** Invocation to call for combining probabilities.
85
86 As an alternative you can use @c setProbabilitiesCombinerWithTarget:selector:userInfo:().
87
88 By default it uses @c robinsonFisherCombinerOn:userInfo:.
89 */
92c61da @smendes Probability combiner can be changed now.
smendes authored Apr 13, 2010
90 @property (readwrite, retain) NSInvocation *probabilitiesCombinerInvocation;
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
91
92 /** Tokenizer to use on string training or guessing.
93
94 By default it uses @c BKTokenizer
95 */
28ea0f0 @smendes Classifier can use another tokenizer now
smendes authored Apr 13, 2010
96 @property (readwrite, retain) id<BKTokenizing> tokenizer;
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
97
199fe14 @smendes Reordering methods
smendes authored Mar 28, 2010
98
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
99 //////////////////////////////////////////////////////////////////////////////////////////
100 /// @name Creating a classifier
101 //////////////////////////////////////////////////////////////////////////////////////////
199fe14 @smendes Reordering methods
smendes authored Mar 28, 2010
102
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
103 /** Create a new classifier using a previous training saved in a file.
104
105 @param path The path to the file containing the classifier's save.
106 @returns A new bayesian classifier.
107 @see initWithContentsOfFile:
44ac5d5 @smendes Added a beginning of doxygen documentation
smendes authored Apr 14, 2010
108 */
a2bd13c @smendes Refactoring: extra "Bayesian" in the name removed
smendes authored Apr 14, 2010
109 - (BKClassifier*)classifierWithContentsOfFile:(NSString*)path;
199fe14 @smendes Reordering methods
smendes authored Mar 28, 2010
110
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
111
112 //////////////////////////////////////////////////////////////////////////////////////////
113 /// @name Initializing a classifier
114 //////////////////////////////////////////////////////////////////////////////////////////
115
116 /** Initialize a bayesian classifier using a previous training saved in a file.
117
118 @param path The path to the file containing the classifier's save.
119 @returns A bayesian classifier initialized.
120 @see classifierWithContentsOfFile:
121 */
122 - (id)initWithContentsOfFile:(NSString*)path;
123
124
125 //////////////////////////////////////////////////////////////////////////////////////////
126 /// @name Storing a classifier's training
127 //////////////////////////////////////////////////////////////////////////////////////////
128
129 /** Saves all training data in a file.
130
131 If path contains a tilde (~) character, you must expand it before invoking this method.
132 @param path The path at which to write the file.
133 @return YES if the file is written successfully, otherwise NO.
134 */
199fe14 @smendes Reordering methods
smendes authored Mar 28, 2010
135 - (BOOL)writeToFile:(NSString*)path;
136
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
137
138 //////////////////////////////////////////////////////////////////////////////////////////
139 /// @name Creating & Destroying pools
140 //////////////////////////////////////////////////////////////////////////////////////////
141
142 /** Returns the pool with a given name.
143
144 If the classifier do not hold a pool with this name, a new one is created.
145 @param poolName The name of the pool to look for.
146 @return The pool associated to the name.
147 */
a2bd13c @smendes Refactoring: extra "Bayesian" in the name removed
smendes authored Apr 14, 2010
148 - (BKDataPool*)poolNamed:(NSString*)poolName;
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
149
150 /** Destroy a pool with a given name.
151
152 @param poolName The name of the pool.
153 */
199fe14 @smendes Reordering methods
smendes authored Mar 28, 2010
154 - (void)removePoolNamed:(NSString*)poolName;
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
155
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
156
157 //////////////////////////////////////////////////////////////////////////////////////////
158 /// @name Updating probabilities
159 //////////////////////////////////////////////////////////////////////////////////////////
160
161 /** Compute the probability associated with every tokens in every pools. */
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
162 - (void)updatePoolsProbabilities;
163
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
164
165 //////////////////////////////////////////////////////////////////////////////////////////
166 /// @name Probabilities combining
167 //////////////////////////////////////////////////////////////////////////////////////////
168
169 /** Change the probabilities combiner.
170
171 @param target The object to which to send the message specified by selector when
172 the classifier needs to combine a series of probabilities.
173 The target object is @b not retained by the classifier.
174 @param selector The selector to send to the target when the classifier needs to
175 combine a series of probabilities. The selector must have the same signature than
176 @c robinsonCombinerOn:userInfo:(). The classifier passes an array
177 of @c NSNumber containing float values in @a probabilities.
178 @param userInfo Custom user info for the combiner.
179 The object you specify is @b not retained by the classifier.
180 This parameter may be nil.
181 @see robinsonCombinerOn:userInfo:
182 @see robinsonFisherCombinerOn:userInfo:
183 */
92c61da @smendes Probability combiner can be changed now.
smendes authored Apr 13, 2010
184 - (void)setProbabilitiesCombinerWithTarget:(id)target selector:(SEL)selector userInfo:(id)userInfo;
199fe14 @smendes Reordering methods
smendes authored Mar 28, 2010
185
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
186 /** Compute Robinson's combiner on a series of probabilities.
187
188 @param probabilities An array of @c NSNumber containing float numbers.
189 @param userInfo Custom user info for the combiner. Unused in this method.
190 @return A single probability representing the serie.
191 @see robinsonFisherCombinerOn:userInfo:
192 */
193 - (float)robinsonCombinerOn:(NSArray*)probabilities userInfo:(id)userInfo;
194
195 /** Compute Robinson-Fisher's combiner on a series of probabilities.
196
197 @param probabilities An array of @c NSNumber containing float numbers.
198 @param userInfo Custom user info for the combiner. Unused in this method.
199 @return A single probability representing the serie.
200 @see robinsonCombinerOn:userInfo:
201 */
202 - (float)robinsonFisherCombinerOn:(NSArray*)probabilities userInfo:(id)userInfo;
203
204
205 //////////////////////////////////////////////////////////////////////////////////////////
206 /// @name Training a classifier
207 //////////////////////////////////////////////////////////////////////////////////////////
208
209 /** Train the classifier on a file.
210
211 @param path The path to the file on which the classifier will train.
212 @param poolName The name of the pool to which the content of the file belongs.
213 @see trainWithString:forPoolNamed:
214 @see trainWithTokens:forPoolNamed:
215 */
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
216 - (void)trainWithFile:(NSString*)path forPoolNamed:(NSString*)poolName;
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
217
218 /** Train the classifier on a string.
219
220 @param trainString The string on which the classifier will train.
221 @param poolName The name of the pool to which the content of the file belongs.
222 @see trainWithFile:forPoolNamed:
223 @see trainWithTokens:forPoolNamed:
224 */
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
225 - (void)trainWithString:(NSString*)trainString forPoolNamed:(NSString*)poolName;
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
226
227 /** Train the classifier on a group of tokens.
228
229 @param tokens Tokens to add to one of the classifier's pool.
230 @param poolName The name of the pool where the tokens belongs.
231 @see trainWithFile:forPoolNamed:
232 @see trainWithString:forPoolNamed:
233 */
a2bd13c @smendes Refactoring: extra "Bayesian" in the name removed
smendes authored Apr 14, 2010
234 - (void)trainWithTokens:(NSArray*)tokens inPool:(BKDataPool*)pool;
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
235
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
236
237 //////////////////////////////////////////////////////////////////////////////////////////
238 /// @name Guessing with the classifier
239 //////////////////////////////////////////////////////////////////////////////////////////
240
241 /** Ask the classifier to guess on a file.
242
243 @param path The path to the file on which the classifier will make a guess.
244 @return A dictionary with every pools' names as keys and theirs probability to
245 be associated with the file's content.
246 @see guessWithString:
247 @see guessWithTokens:
248 */
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
249 - (NSDictionary*)guessWithFile:(NSString*)path;
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
250
251 /** Ask the classifier to guess on a string.
252
253 @param string The string on which the classifier will make a guess.
254 @return A dictionary with every pools' names as keys and theirs probability to
255 be associated with the string.
256 @see guessWithFile:
257 @see guessWithTokens:
258 */
259 - (NSDictionary*)guessWithString:(NSString*)string;
260
261 /** Ask the classifier to guess on a group of tokens.
262
263 @param tokens Tokens on which the classifier will make a guess.
264 @return A dictionary with every pools' names as keys and theirs probability to
265 be associated with those tokens.
266 @see guessWithFile:
267 @see guessWithString:
268 */
199fe14 @smendes Reordering methods
smendes authored Mar 28, 2010
269 - (NSDictionary*)guessWithTokens:(NSArray*)tokens;
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
270
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
271
272 //////////////////////////////////////////////////////////////////////////////////////////
273 /// @name Optimizing the classifier
274 //////////////////////////////////////////////////////////////////////////////////////////
275
276 /** Remove any tokens with a total count lower than a given level.
277
278 @param level The minimum amount a tokens needs not to get removed.
279 */
199fe14 @smendes Reordering methods
smendes authored Mar 28, 2010
280 - (void)stripToLevel:(NSUInteger)level;
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
281
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
282
283 //////////////////////////////////////////////////////////////////////////////////////////
284 /// @name Getting informations
285 //////////////////////////////////////////////////////////////////////////////////////////
286
287 /** Print some basics statistics on the pools */
2e9bd7b @smendes Initial Commit
smendes authored Mar 27, 2010
288 - (void)printInformations;
289
290 @end
9970412 @smendes API Documentation
smendes authored Apr 14, 2010
291
292
293 //////////////////////////////////////////////////////////////////////////////////////////
294 /// @name Constants
295 //////////////////////////////////////////////////////////////////////////////////////////
296
297 /** Pool name for the corpus' pool */
298 extern NSString* const BKCorpusDataPoolName;
Something went wrong with that request. Please try again.