Skip to content

Commit

Permalink
API Documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
smendes committed Apr 14, 2010
1 parent 303bc77 commit 9970412
Show file tree
Hide file tree
Showing 9 changed files with 431 additions and 59 deletions.
4 changes: 2 additions & 2 deletions BayesianKit.xcodeproj/project.pbxproj
Expand Up @@ -118,7 +118,7 @@
D2F7E79907B2D74100F64583 /* CoreData.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreData.framework; path = /System/Library/Frameworks/CoreData.framework; sourceTree = "<absolute>"; };
E224A1D31163A4DC00AD8CA6 /* ParseKit.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = ParseKit.xcodeproj; path = "../todparsekit-read-only/ParseKit.xcodeproj"; sourceTree = SOURCE_ROOT; };
E26C1453115E324100CFCCF1 /* BayesianKit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BayesianKit.h; sourceTree = "<group>"; };
E26C1454115E324100CFCCF1 /* BKClassifier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BKClassifier.h; sourceTree = "<group>"; };
E26C1454115E324100CFCCF1 /* BKClassifier.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BKClassifier.h; sourceTree = "<group>"; wrapsLines = 0; };
E26C1455115E324100CFCCF1 /* BKClassifier.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = BKClassifier.m; sourceTree = "<group>"; };
E26C1456115E324100CFCCF1 /* BKDataPool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BKDataPool.h; sourceTree = "<group>"; };
E26C1457115E324100CFCCF1 /* BKDataPool.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = BKDataPool.m; sourceTree = "<group>"; };
Expand Down Expand Up @@ -462,7 +462,7 @@
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
shellScript = "DOXYGEN=\"/usr/local/bin/doxygen\"\nAPPLEDOC_PATH=\"$SRCROOT/external/appledoc/\"\n\n\"$APPLEDOC_PATH/appledoc\" -p \"$PROJECT_NAME\" \\\n -i \"$SRCROOT\" \\\n -o \"$SRCROOT/docs\" \\\n -t \"$APPLEDOC_PATH/Templates\" \\\n -d \"$DOXYGEN\" \\\n -c Doxyfile \\\n --clean-temp-files \\\n --docset \\\n --xhtml\n\nexit 0";
shellScript = "DOXYGEN=\"/usr/local/bin/doxygen\"\nAPPLEDOC_PATH=\"$SRCROOT/external/appledoc/\"\n\n\"$APPLEDOC_PATH/appledoc\" -p \"$PROJECT_NAME\" \\\n -i \"$SRCROOT\" \\\n -o \"$SRCROOT/docs\" \\\n -t \"$APPLEDOC_PATH/Templates\" \\\n -d \"$DOXYGEN\" \\\n -c Doxyfile \\\n --docset \\\n --xhtml\n\nexit 0";
};
/* End PBXShellScriptBuildPhase section */

Expand Down
4 changes: 2 additions & 2 deletions Doxyfile
Expand Up @@ -196,7 +196,7 @@ ALIASES =
# For instance, some of the names that are used will be different. The list
# of all members will be omitted, etc.

OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_FOR_C = NO

# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
# sources only. Doxygen will then generate output that is more tailored for
Expand Down Expand Up @@ -308,7 +308,7 @@ SYMBOL_CACHE_SIZE = 0
# Private class members and static file members will be hidden unless
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES

EXTRACT_ALL = YES
EXTRACT_ALL = NO

# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
# will be included in the documentation.
Expand Down
241 changes: 214 additions & 27 deletions src/BKClassifier.h
Expand Up @@ -41,21 +41,31 @@
#import <BayesianKit/BKTokenizing.h>


/**
Pool name for the corpus' pool
*/
extern NSString* const BKCorpusDataPoolName;

/**
Naive bayesian classifier class.
/** Implementation of a naive bayesian classifier.
BKClassifier is provided with a default setup using Robinson-Fisher
probabilities combiner and a ParseKit-based tokenizer.
Using methods @c initWithContentsOfFile:() and @c writeToFile:() the
classifier's training can be saved and reloaded. Note that if you change the
probabilities combiner or the tokenizer, those changes are not saved in the
file. You need to reapply thoses changes after reloading the classifier.
To train the classifier use @c trainWithFile:forPoolNamed:() or
@c trainWithString:forPoolNamed:(). At the end of those methods
@c updatePoolsProbabilities() will be automatically called and probabilities
associated to each tokens will be re-computed.
More blabla
Once trained the classifier can be immediatly used with @c guessWithFile:() or
@c guessWithString:(). Both returns a dictionary containing the score, in
percent for each pool.
To avoid unecessary big pools, @c stripToLevel:() will remove any token with a
total count lower than specified.
*/
@interface BKClassifier : NSObject<NSCoding> {
@interface BKClassifier : NSObject <NSCoding> {
BKDataPool *corpus;
/**
Data pools indexed by theirs names
*/

NSMutableDictionary *pools;
BOOL dirty;

Expand All @@ -64,48 +74,225 @@ extern NSString* const BKCorpusDataPoolName;
id<BKTokenizing> tokenizer;
}

//////////////////////////////////////////////////////////////////////////////////////////
/// @name Properties
//////////////////////////////////////////////////////////////////////////////////////////

/** Dictionary containing every data pools of the classifier */
@property (readonly) NSMutableDictionary *pools;

/** Invocation to call for combining probabilities.
As an alternative you can use @c setProbabilitiesCombinerWithTarget:selector:userInfo:().
By default it uses @c robinsonFisherCombinerOn:userInfo:.
*/
@property (readwrite, retain) NSInvocation *probabilitiesCombinerInvocation;

/** Tokenizer to use on string training or guessing.
By default it uses @c BKTokenizer
*/
@property (readwrite, retain) id<BKTokenizing> tokenizer;


/**
Initialize a bayesian classifier using a previous training saved in a file
@param path The path to the file containing the classifier's save
@returns A bayesian classifier initialized
*/
- (id)initWithContentsOfFile:(NSString*)path;
//////////////////////////////////////////////////////////////////////////////////////////
/// @name Creating a classifier
//////////////////////////////////////////////////////////////////////////////////////////

/**
Create a new classifier using a previous training saved in a file
@param path The path to the file containing the classifier's save
@returns A new bayesian classifier
/** Create a new classifier using a previous training saved in a file.
@param path The path to the file containing the classifier's save.
@returns A new bayesian classifier.
@see initWithContentsOfFile:
*/
- (BKClassifier*)classifierWithContentsOfFile:(NSString*)path;


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Initializing a classifier
//////////////////////////////////////////////////////////////////////////////////////////

/** Initialize a bayesian classifier using a previous training saved in a file.
@param path The path to the file containing the classifier's save.
@returns A bayesian classifier initialized.
@see classifierWithContentsOfFile:
*/
- (id)initWithContentsOfFile:(NSString*)path;


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Storing a classifier's training
//////////////////////////////////////////////////////////////////////////////////////////

/** Saves all training data in a file.
If path contains a tilde (~) character, you must expand it before invoking this method.
@param path The path at which to write the file.
@return YES if the file is written successfully, otherwise NO.
*/
- (BOOL)writeToFile:(NSString*)path;


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Creating & Destroying pools
//////////////////////////////////////////////////////////////////////////////////////////

/** Returns the pool with a given name.
If the classifier do not hold a pool with this name, a new one is created.
@param poolName The name of the pool to look for.
@return The pool associated to the name.
*/
- (BKDataPool*)poolNamed:(NSString*)poolName;

/** Destroy a pool with a given name.
@param poolName The name of the pool.
*/
- (void)removePoolNamed:(NSString*)poolName;
- (void)mergePoolNamed:(NSString*)sourcePoolName withPoolNamed:(NSString*)destPoolName;


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Updating probabilities
//////////////////////////////////////////////////////////////////////////////////////////

/** Compute the probability associated with every tokens in every pools. */
- (void)updatePoolsProbabilities;
- (void)buildProbabilityCache;


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Probabilities combining
//////////////////////////////////////////////////////////////////////////////////////////

/** Change the probabilities combiner.
@param target The object to which to send the message specified by selector when
the classifier needs to combine a series of probabilities.
The target object is @b not retained by the classifier.
@param selector The selector to send to the target when the classifier needs to
combine a series of probabilities. The selector must have the same signature than
@c robinsonCombinerOn:userInfo:(). The classifier passes an array
of @c NSNumber containing float values in @a probabilities.
@param userInfo Custom user info for the combiner.
The object you specify is @b not retained by the classifier.
This parameter may be nil.
@see robinsonCombinerOn:userInfo:
@see robinsonFisherCombinerOn:userInfo:
*/
- (void)setProbabilitiesCombinerWithTarget:(id)target selector:(SEL)selector userInfo:(id)userInfo;
- (float)robinsonCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id)userInfo;
- (float)robinsonFisherCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id)userInfo;

/** Compute Robinson's combiner on a series of probabilities.
@param probabilities An array of @c NSNumber containing float numbers.
@param userInfo Custom user info for the combiner. Unused in this method.
@return A single probability representing the serie.
@see robinsonFisherCombinerOn:userInfo:
*/
- (float)robinsonCombinerOn:(NSArray*)probabilities userInfo:(id)userInfo;

/** Compute Robinson-Fisher's combiner on a series of probabilities.
@param probabilities An array of @c NSNumber containing float numbers.
@param userInfo Custom user info for the combiner. Unused in this method.
@return A single probability representing the serie.
@see robinsonCombinerOn:userInfo:
*/
- (float)robinsonFisherCombinerOn:(NSArray*)probabilities userInfo:(id)userInfo;


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Training a classifier
//////////////////////////////////////////////////////////////////////////////////////////

/** Train the classifier on a file.
@param path The path to the file on which the classifier will train.
@param poolName The name of the pool to which the content of the file belongs.
@see trainWithString:forPoolNamed:
@see trainWithTokens:forPoolNamed:
*/
- (void)trainWithFile:(NSString*)path forPoolNamed:(NSString*)poolName;

/** Train the classifier on a string.
@param trainString The string on which the classifier will train.
@param poolName The name of the pool to which the content of the file belongs.
@see trainWithFile:forPoolNamed:
@see trainWithTokens:forPoolNamed:
*/
- (void)trainWithString:(NSString*)trainString forPoolNamed:(NSString*)poolName;

/** Train the classifier on a group of tokens.
@param tokens Tokens to add to one of the classifier's pool.
@param poolName The name of the pool where the tokens belongs.
@see trainWithFile:forPoolNamed:
@see trainWithString:forPoolNamed:
*/
- (void)trainWithTokens:(NSArray*)tokens inPool:(BKDataPool*)pool;

- (NSDictionary*)guessWithString:(NSString*)string;

//////////////////////////////////////////////////////////////////////////////////////////
/// @name Guessing with the classifier
//////////////////////////////////////////////////////////////////////////////////////////

/** Ask the classifier to guess on a file.
@param path The path to the file on which the classifier will make a guess.
@return A dictionary with every pools' names as keys and theirs probability to
be associated with the file's content.
@see guessWithString:
@see guessWithTokens:
*/
- (NSDictionary*)guessWithFile:(NSString*)path;

/** Ask the classifier to guess on a string.
@param string The string on which the classifier will make a guess.
@return A dictionary with every pools' names as keys and theirs probability to
be associated with the string.
@see guessWithFile:
@see guessWithTokens:
*/
- (NSDictionary*)guessWithString:(NSString*)string;

/** Ask the classifier to guess on a group of tokens.
@param tokens Tokens on which the classifier will make a guess.
@return A dictionary with every pools' names as keys and theirs probability to
be associated with those tokens.
@see guessWithFile:
@see guessWithString:
*/
- (NSDictionary*)guessWithTokens:(NSArray*)tokens;


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Optimizing the classifier
//////////////////////////////////////////////////////////////////////////////////////////

/** Remove any tokens with a total count lower than a given level.
@param level The minimum amount a tokens needs not to get removed.
*/
- (void)stripToLevel:(NSUInteger)level;


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Getting informations
//////////////////////////////////////////////////////////////////////////////////////////

/** Print some basics statistics on the pools */
- (void)printInformations;

@end


//////////////////////////////////////////////////////////////////////////////////////////
/// @name Constants
//////////////////////////////////////////////////////////////////////////////////////////

/** Pool name for the corpus' pool */
extern NSString* const BKCorpusDataPoolName;
26 changes: 6 additions & 20 deletions src/BKClassifier.m
Expand Up @@ -42,6 +42,7 @@

@interface BKClassifier (Private)
+ (float)chiSquare:(float)chi withDegreeOfFreedom:(NSUInteger)df;
- (void)buildProbabilityCache;
@end


Expand All @@ -61,7 +62,7 @@ - (id)init
dirty = YES;

[self setProbabilitiesCombinerWithTarget:self
selector:@selector(robinsonFisherCombinerOnProbabilities:userInfo:)
selector:@selector(robinsonFisherCombinerOn:userInfo:)
userInfo:nil];

tokenizer = [[BKTokenizer alloc] init];
Expand Down Expand Up @@ -98,7 +99,7 @@ - (id)initWithCoder:(NSCoder*)coder
pools = [[coder decodeObjectForKey:@"Pools"] retain];

[self setProbabilitiesCombinerWithTarget:self
selector:@selector(robinsonFisherCombinerOnProbabilities:userInfo:)
selector:@selector(robinsonFisherCombinerOn:userInfo:)
userInfo:nil];
}
return self;
Expand Down Expand Up @@ -145,21 +146,6 @@ - (void)removePoolNamed:(NSString*)poolName
dirty = YES;
}

- (void)mergePoolNamed:(NSString*)sourcePoolName withPoolNamed:(NSString*)destPoolName
{
BKDataPool *sourcePool = [pools objectForKey:sourcePoolName];
BKDataPool *destPool = [pools objectForKey:destPoolName];

if (!sourcePool || !destPool) return;

for (NSString *token in sourcePool) {
NSUInteger count = [sourcePool countForToken:token];
[destPool addCount:count forToken:token];
}

dirty = YES;
}

#pragma mark -
#pragma mark Probabilities
- (void)updatePoolsProbabilities
Expand Down Expand Up @@ -201,7 +187,7 @@ - (void)buildProbabilityCache
#pragma mark Combiners
- (void)setProbabilitiesCombinerWithTarget:(id)target selector:(SEL)selector userInfo:(id)userInfo
{
SEL signatureSelector = @selector(robinsonCombinerOnProbabilities:userInfo:);
SEL signatureSelector = @selector(robinsonCombinerOn:userInfo:);
NSMethodSignature *signature = [BKClassifier instanceMethodSignatureForSelector:signatureSelector];
NSInvocation *invocation = [NSInvocation invocationWithMethodSignature:signature];

Expand All @@ -213,7 +199,7 @@ - (void)setProbabilitiesCombinerWithTarget:(id)target selector:(SEL)selector use
[self setProbabilitiesCombinerInvocation:invocation];
}

- (float)robinsonCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id) __unused userInfo
- (float)robinsonCombinerOn:(NSArray*)probabilities userInfo:(id) __unused userInfo
{
NSUInteger length = [probabilities count];
float nth = 1.0f / (uint32_t)length;
Expand All @@ -240,7 +226,7 @@ - (float)robinsonCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id) _
return (1.0f + S) / 2.0f;
}

- (float)robinsonFisherCombinerOnProbabilities:(NSArray*)probabilities userInfo:(id) __unused userInfo
- (float)robinsonFisherCombinerOn:(NSArray*)probabilities userInfo:(id) __unused userInfo
{
NSUInteger length = [probabilities count];
float probs[length], inverseProbs[length];
Expand Down

0 comments on commit 9970412

Please sign in to comment.