Permalink
Browse files

API Documentation

  • Loading branch information...
1 parent 303bc77 commit 9970412ddba61f08f2eb4f3d14d78482860fdb6b @smendes smendes committed Apr 14, 2010
Showing with 431 additions and 59 deletions.
  1. +2 −2 BayesianKit.xcodeproj/project.pbxproj
  2. +2 −2 Doxyfile
  3. +214 −27 src/BKClassifier.h
  4. +6 −20 src/BKClassifier.m
  5. +109 −3 src/BKDataPool.h
  6. +58 −2 src/BKTokenData.h
  7. +17 −2 src/BKTokenizer.h
  8. +1 −0 src/BKTokenizer.m
  9. +22 −1 src/BKTokenizing.h
@@ -118,7 +118,7 @@
D2F7E79907B2D74100F64583 /* CoreData.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreData.framework; path = /System/Library/Frameworks/CoreData.framework; sourceTree = "<absolute>"; };
E224A1D31163A4DC00AD8CA6 /* ParseKit.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = ParseKit.xcodeproj; path = "../todparsekit-read-only/ParseKit.xcodeproj"; sourceTree = SOURCE_ROOT; };
E26C1453115E324100CFCCF1 /* BayesianKit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BayesianKit.h; sourceTree = "<group>"; };
- E26C1454115E324100CFCCF1 /* BKClassifier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BKClassifier.h; sourceTree = "<group>"; };
+ E26C1454115E324100CFCCF1 /* BKClassifier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BKClassifier.h; sourceTree = "<group>"; wrapsLines = 0; };
E26C1455115E324100CFCCF1 /* BKClassifier.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = BKClassifier.m; sourceTree = "<group>"; };
E26C1456115E324100CFCCF1 /* BKDataPool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BKDataPool.h; sourceTree = "<group>"; };
E26C1457115E324100CFCCF1 /* BKDataPool.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = BKDataPool.m; sourceTree = "<group>"; };
@@ -462,7 +462,7 @@
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
- shellScript = "DOXYGEN=\"/usr/local/bin/doxygen\"\nAPPLEDOC_PATH=\"$SRCROOT/external/appledoc/\"\n\n\"$APPLEDOC_PATH/appledoc\" -p \"$PROJECT_NAME\" \\\n -i \"$SRCROOT\" \\\n -o \"$SRCROOT/docs\" \\\n -t \"$APPLEDOC_PATH/Templates\" \\\n -d \"$DOXYGEN\" \\\n -c Doxyfile \\\n --clean-temp-files \\\n --docset \\\n --xhtml\n\nexit 0";
+ shellScript = "DOXYGEN=\"/usr/local/bin/doxygen\"\nAPPLEDOC_PATH=\"$SRCROOT/external/appledoc/\"\n\n\"$APPLEDOC_PATH/appledoc\" -p \"$PROJECT_NAME\" \\\n -i \"$SRCROOT\" \\\n -o \"$SRCROOT/docs\" \\\n -t \"$APPLEDOC_PATH/Templates\" \\\n -d \"$DOXYGEN\" \\\n -c Doxyfile \\\n --docset \\\n --xhtml\n\nexit 0";
};
/* End PBXShellScriptBuildPhase section */
View
@@ -196,7 +196,7 @@ ALIASES =
# For instance, some of the names that are used will be different. The list
# of all members will be omitted, etc.
-OPTIMIZE_OUTPUT_FOR_C = YES
+OPTIMIZE_OUTPUT_FOR_C = NO
# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
# sources only. Doxygen will then generate output that is more tailored for
@@ -308,7 +308,7 @@ SYMBOL_CACHE_SIZE = 0
# Private class members and static file members will be hidden unless
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
-EXTRACT_ALL = YES
+EXTRACT_ALL = NO
# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
# will be included in the documentation.
View
@@ -41,21 +41,31 @@
#import <BayesianKit/BKTokenizing.h>
-/**
- Pool name for the corpus' pool
- */
-extern NSString* const BKCorpusDataPoolName;
-
-/**
- Naive bayesian classifier class.
+/** Implementation of a naive bayesian classifier.
+
+ BKClassifier is provided with a default setup using Robinson-Fisher
+ probabilities combiner and a ParseKit-based tokenizer.
+
+ Using methods @c initWithContentsOfFile:() and @c writeToFile:() the
+ classifier's training can be saved and reloaded. Note that if you change the
+ probabilities combiner or the tokenizer, those changes are not saved in the
+ file. You need to reapply thoses changes after reloading the classifier.
+
+ To train the classifier use @c trainWithFile:forPoolNamed:() or
+ @c trainWithString:forPoolNamed:(). At the end of those methods
+ @c updatePoolsProbabilities() will be automatically called and probabilities
+ associated to each tokens will be re-computed.
- More blabla
+ Once trained the classifier can be immediatly used with @c guessWithFile:() or
+ @c guessWithString:(). Both returns a dictionary containing the score, in
+ percent for each pool.
+
+ To avoid unecessary big pools, @c stripToLevel:() will remove any token with a
+ total count lower than specified.
*/
-@interface BKClassifier : NSObject<NSCoding> {
+@interface BKClassifier : NSObject <NSCoding> {
BKDataPool *corpus;
- /**
- Data pools indexed by theirs names
- */
+
NSMutableDictionary *pools;
BOOL dirty;
@@ -64,48 +74,225 @@ extern NSString* const BKCorpusDataPoolName;
id<BKTokenizing> tokenizer;
}
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Properties
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Dictionary containing every data pools of the classifier */
@property (readonly) NSMutableDictionary *pools;
+
+/** Invocation to call for combining probabilities.
+
+ As an alternative you can use @c setProbabilitiesCombinerWithTarget:selector:userInfo:().
+
+ By default it uses @c robinsonFisherCombinerOn:userInfo:.
+ */
@property (readwrite, retain) NSInvocation *probabilitiesCombinerInvocation;
+
+/** Tokenizer to use on string training or guessing.
+
+ By default it uses @c BKTokenizer
+ */
@property (readwrite, retain) id<BKTokenizing> tokenizer;
-/**
- Initialize a bayesian classifier using a previous training saved in a file
- @param path The path to the file containing the classifier's save
- @returns A bayesian classifier initialized
- */
-- (id)initWithContentsOfFile:(NSString*)path;
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Creating a classifier
+//////////////////////////////////////////////////////////////////////////////////////////
-/**
- Create a new classifier using a previous training saved in a file
- @param path The path to the file containing the classifier's save
- @returns A new bayesian classifier
+/** Create a new classifier using a previous training saved in a file.
+
+ @param path The path to the file containing the classifier's save.
+ @returns A new bayesian classifier.
+ @see initWithContentsOfFile:
*/
- (BKClassifier*)classifierWithContentsOfFile:(NSString*)path;
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Initializing a classifier
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Initialize a bayesian classifier using a previous training saved in a file.
+
+ @param path The path to the file containing the classifier's save.
+ @returns A bayesian classifier initialized.
+ @see classifierWithContentsOfFile:
+ */
+- (id)initWithContentsOfFile:(NSString*)path;
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Storing a classifier's training
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Saves all training data in a file.
+
+ If path contains a tilde (~) character, you must expand it before invoking this method.
+ @param path The path at which to write the file.
+ @return YES if the file is written successfully, otherwise NO.
+ */
- (BOOL)writeToFile:(NSString*)path;
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Creating & Destroying pools
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Returns the pool with a given name.
+
+ If the classifier do not hold a pool with this name, a new one is created.
+ @param poolName The name of the pool to look for.
+ @return The pool associated to the name.
+ */
- (BKDataPool*)poolNamed:(NSString*)poolName;
+
+/** Destroy a pool with a given name.
+
+ @param poolName The name of the pool.
+ */
- (void)removePoolNamed:(NSString*)poolName;
-- (void)mergePoolNamed:(NSString*)sourcePoolName withPoolNamed:(NSString*)destPoolName;
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Updating probabilities
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Compute the probability associated with every tokens in every pools. */
- (void)updatePoolsProbabilities;
-- (void)buildProbabilityCache;
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Probabilities combining
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Change the probabilities combiner.
+
+ @param target The object to which to send the message specified by selector when
+ the classifier needs to combine a series of probabilities.
+ The target object is @b not retained by the classifier.
+ @param selector The selector to send to the target when the classifier needs to
+ combine a series of probabilities. The selector must have the same signature than
+ @c robinsonCombinerOn:userInfo:(). The classifier passes an array
+ of @c NSNumber containing float values in @a probabilities.
+ @param userInfo Custom user info for the combiner.
+ The object you specify is @b not retained by the classifier.
+ This parameter may be nil.
+ @see robinsonCombinerOn:userInfo:
+ @see robinsonFisherCombinerOn:userInfo:
+ */
- (void)setProbabilitiesCombinerWithTarget:(id)target selector:(SEL)selector userInfo:(id)userInfo;
-- (float)robinsonCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id)userInfo;
-- (float)robinsonFisherCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id)userInfo;
+/** Compute Robinson's combiner on a series of probabilities.
+
+ @param probabilities An array of @c NSNumber containing float numbers.
+ @param userInfo Custom user info for the combiner. Unused in this method.
+ @return A single probability representing the serie.
+ @see robinsonFisherCombinerOn:userInfo:
+ */
+- (float)robinsonCombinerOn:(NSArray*)probabilities userInfo:(id)userInfo;
+
+/** Compute Robinson-Fisher's combiner on a series of probabilities.
+
+ @param probabilities An array of @c NSNumber containing float numbers.
+ @param userInfo Custom user info for the combiner. Unused in this method.
+ @return A single probability representing the serie.
+ @see robinsonCombinerOn:userInfo:
+ */
+- (float)robinsonFisherCombinerOn:(NSArray*)probabilities userInfo:(id)userInfo;
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Training a classifier
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Train the classifier on a file.
+
+ @param path The path to the file on which the classifier will train.
+ @param poolName The name of the pool to which the content of the file belongs.
+ @see trainWithString:forPoolNamed:
+ @see trainWithTokens:forPoolNamed:
+ */
- (void)trainWithFile:(NSString*)path forPoolNamed:(NSString*)poolName;
+
+/** Train the classifier on a string.
+
+ @param trainString The string on which the classifier will train.
+ @param poolName The name of the pool to which the content of the file belongs.
+ @see trainWithFile:forPoolNamed:
+ @see trainWithTokens:forPoolNamed:
+ */
- (void)trainWithString:(NSString*)trainString forPoolNamed:(NSString*)poolName;
+
+/** Train the classifier on a group of tokens.
+
+ @param tokens Tokens to add to one of the classifier's pool.
+ @param poolName The name of the pool where the tokens belongs.
+ @see trainWithFile:forPoolNamed:
+ @see trainWithString:forPoolNamed:
+ */
- (void)trainWithTokens:(NSArray*)tokens inPool:(BKDataPool*)pool;
-- (NSDictionary*)guessWithString:(NSString*)string;
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Guessing with the classifier
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Ask the classifier to guess on a file.
+
+ @param path The path to the file on which the classifier will make a guess.
+ @return A dictionary with every pools' names as keys and theirs probability to
+ be associated with the file's content.
+ @see guessWithString:
+ @see guessWithTokens:
+ */
- (NSDictionary*)guessWithFile:(NSString*)path;
+
+/** Ask the classifier to guess on a string.
+
+ @param string The string on which the classifier will make a guess.
+ @return A dictionary with every pools' names as keys and theirs probability to
+ be associated with the string.
+ @see guessWithFile:
+ @see guessWithTokens:
+ */
+- (NSDictionary*)guessWithString:(NSString*)string;
+
+/** Ask the classifier to guess on a group of tokens.
+
+ @param tokens Tokens on which the classifier will make a guess.
+ @return A dictionary with every pools' names as keys and theirs probability to
+ be associated with those tokens.
+ @see guessWithFile:
+ @see guessWithString:
+ */
- (NSDictionary*)guessWithTokens:(NSArray*)tokens;
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Optimizing the classifier
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Remove any tokens with a total count lower than a given level.
+
+ @param level The minimum amount a tokens needs not to get removed.
+ */
- (void)stripToLevel:(NSUInteger)level;
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Getting informations
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Print some basics statistics on the pools */
- (void)printInformations;
@end
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+/// @name Constants
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/** Pool name for the corpus' pool */
+extern NSString* const BKCorpusDataPoolName;
View
@@ -42,6 +42,7 @@
@interface BKClassifier (Private)
+ (float)chiSquare:(float)chi withDegreeOfFreedom:(NSUInteger)df;
+- (void)buildProbabilityCache;
@end
@@ -61,7 +62,7 @@ - (id)init
dirty = YES;
[self setProbabilitiesCombinerWithTarget:self
- selector:@selector(robinsonFisherCombinerOnProbabilities:userInfo:)
+ selector:@selector(robinsonFisherCombinerOn:userInfo:)
userInfo:nil];
tokenizer = [[BKTokenizer alloc] init];
@@ -98,7 +99,7 @@ - (id)initWithCoder:(NSCoder*)coder
pools = [[coder decodeObjectForKey:@"Pools"] retain];
[self setProbabilitiesCombinerWithTarget:self
- selector:@selector(robinsonFisherCombinerOnProbabilities:userInfo:)
+ selector:@selector(robinsonFisherCombinerOn:userInfo:)
userInfo:nil];
}
return self;
@@ -145,21 +146,6 @@ - (void)removePoolNamed:(NSString*)poolName
dirty = YES;
}
-- (void)mergePoolNamed:(NSString*)sourcePoolName withPoolNamed:(NSString*)destPoolName
-{
- BKDataPool *sourcePool = [pools objectForKey:sourcePoolName];
- BKDataPool *destPool = [pools objectForKey:destPoolName];
-
- if (!sourcePool || !destPool) return;
-
- for (NSString *token in sourcePool) {
- NSUInteger count = [sourcePool countForToken:token];
- [destPool addCount:count forToken:token];
- }
-
- dirty = YES;
-}
-
#pragma mark -
#pragma mark Probabilities
- (void)updatePoolsProbabilities
@@ -201,7 +187,7 @@ - (void)buildProbabilityCache
#pragma mark Combiners
- (void)setProbabilitiesCombinerWithTarget:(id)target selector:(SEL)selector userInfo:(id)userInfo
{
- SEL signatureSelector = @selector(robinsonCombinerOnProbabilities:userInfo:);
+ SEL signatureSelector = @selector(robinsonCombinerOn:userInfo:);
NSMethodSignature *signature = [BKClassifier instanceMethodSignatureForSelector:signatureSelector];
NSInvocation *invocation = [NSInvocation invocationWithMethodSignature:signature];
@@ -213,7 +199,7 @@ - (void)setProbabilitiesCombinerWithTarget:(id)target selector:(SEL)selector use
[self setProbabilitiesCombinerInvocation:invocation];
}
-- (float)robinsonCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id) __unused userInfo
+- (float)robinsonCombinerOn:(NSArray*)probabilities userInfo:(id) __unused userInfo
{
NSUInteger length = [probabilities count];
float nth = 1.0f / (uint32_t)length;
@@ -240,7 +226,7 @@ - (float)robinsonCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id) _
return (1.0f + S) / 2.0f;
}
-- (float)robinsonFisherCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id) __unused userInfo
+- (float)robinsonFisherCombinerOn:(NSArray*)probabilities userInfo:(id) __unused userInfo
{
NSUInteger length = [probabilities count];
float probs[length], inverseProbs[length];
Oops, something went wrong.

0 comments on commit 9970412

Please sign in to comment.