classifier tester works on single topic matrix

sbenthall · May 8, 2012 · 2497ca7 · 2497ca7
1 parent 5e7bc72
commit 2497ca7
Showing 1 changed file with 5 additions and 10 deletions.
diff --git a/classify.py b/classify.py
@@ -11,26 +11,22 @@
 
 class LdaSpamClassifierTester:
 
-    # ttm, htm, and sm are numpy arrays
-    # ttm - tweet topic matrix
-    # htm - html topic matrix
+    # tm, and sm are numpy arrays
+    # tm - topic matrix
     # sm - spam matrix (i.e., sm[i] is 1 iff tweet i is spam
-    def __init__(self,ttm,htm,sm):
-        self.ttm = ttm
-        self.htm = htm
+    def __init__(self,tm,sm):
+        self.tm = tm
         self.sm = sm
 
     def features(self,i):
-        return dict([t for t in enumerate(self.ttm[i,:])])
+        return dict([t for t in enumerate(self.tm[i,:])])
 
     def training_example(self,i):
         label = "spam" if self.sm[i] else "ham"
         return (self.features(i), label)
 
     def sample_indices(self,training_ratio=TRAINING_RATIO):
         n_training = int(self.sm.size * training_ratio)
-        print "Training on %s, Testing on %s" % (n_training, self.sm.size)
-        print "Selecting training and testing indices"
         permuted_indices = numpy.random.permutation(self.sm.size)
         training_indices = numpy.arange(self.sm.size)[permuted_indices[n_training:]]
         testing_indices = numpy.arange(self.sm.size)[permuted_indices[:n_training]]
@@ -62,7 +58,6 @@ def compute_classifier_success(self,n_for_validation=N):
             classifier = self.train(train_i)
             results, accuracy = self.test(classifier, test_i)
 
-            print self.ratio(accuracy)
             ratios.append(self.ratio(accuracy))
 
         mean_success = numpy.mean(ratios)