Skip to content

Commit

Permalink
classifier tester works on single topic matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
sbenthall committed May 8, 2012
1 parent 5e7bc72 commit 2497ca7
Showing 1 changed file with 5 additions and 10 deletions.
15 changes: 5 additions & 10 deletions classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,22 @@

class LdaSpamClassifierTester:

# ttm, htm, and sm are numpy arrays
# ttm - tweet topic matrix
# htm - html topic matrix
# tm, and sm are numpy arrays
# tm - topic matrix
# sm - spam matrix (i.e., sm[i] is 1 iff tweet i is spam
def __init__(self,ttm,htm,sm):
self.ttm = ttm
self.htm = htm
def __init__(self,tm,sm):
self.tm = tm
self.sm = sm

def features(self,i):
return dict([t for t in enumerate(self.ttm[i,:])])
return dict([t for t in enumerate(self.tm[i,:])])

def training_example(self,i):
label = "spam" if self.sm[i] else "ham"
return (self.features(i), label)

def sample_indices(self,training_ratio=TRAINING_RATIO):
n_training = int(self.sm.size * training_ratio)
print "Training on %s, Testing on %s" % (n_training, self.sm.size)
print "Selecting training and testing indices"
permuted_indices = numpy.random.permutation(self.sm.size)
training_indices = numpy.arange(self.sm.size)[permuted_indices[n_training:]]
testing_indices = numpy.arange(self.sm.size)[permuted_indices[:n_training]]
Expand Down Expand Up @@ -62,7 +58,6 @@ def compute_classifier_success(self,n_for_validation=N):
classifier = self.train(train_i)
results, accuracy = self.test(classifier, test_i)

print self.ratio(accuracy)
ratios.append(self.ratio(accuracy))

mean_success = numpy.mean(ratios)
Expand Down

0 comments on commit 2497ca7

Please sign in to comment.