Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Changes from vendored gem

  • Loading branch information...
commit e68106c61ec4d5a3bb3d3e7b13d9fee51980e0a6 1 parent a229360
@bpot bpot authored
View
33 lib/classifier/bayes.rb
@@ -28,6 +28,7 @@ def train(category, text)
@categories[category][word] += count
@total_words += count
end
+ reset_correct_counts!
end
#
@@ -55,6 +56,7 @@ def untrain(category, text)
@total_words -= count
end
end
+ reset_correct_counts!
end
#
@@ -80,24 +82,28 @@ def myclassify(text)
myclassify_with_word_hash(text.word_hash)
end
- # http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html
- def myclassify_with_word_hash(word_hash)
+ def myclassify_with_word_hash(word_hash, debugging_info = nil)
member_term_count = @categories[:Member].size
nonmember_term_count = @categories[:"Not member"].size
+
term_count = member_term_count + nonmember_term_count
score = 0
word_hash.each do |word, count|
# count of words in each category
- member_count = @categories[:Member][word].to_i + 1
- nonmember_count = @categories[:"Not member"][word].to_i + 1
- next if member_count.to_i == 1 && nonmember_count.to_i == 1
+ member_count = @categories[:Member][word].to_i + 0.1
+ nonmember_count = @categories[:"Not member"][word].to_i + 0.1
+ next if member_count == 0.1 && nonmember_count == 0.1
# find relative prob word is in class -- p(w|c)
- word_member_p = (member_count) / (total_member_count + term_count).to_f
- word_nonmember_p = (nonmember_count) / (total_nonmember_count + term_count).to_f
+ word_member_p = (member_count) / (total_member_count_correct + term_count).to_f
+ word_nonmember_p = (nonmember_count) / (total_nonmember_count_correct + term_count).to_f
word_pr = Math.log(word_member_p / word_nonmember_p)
score += word_pr * count
+ if debugging_info
+ debugging_info[word] = word_pr * count
+ end
+ #print "#{word_pr * count}: #{word}\n"
end
if score > 0
return "Member", score
@@ -159,6 +165,19 @@ def add_category(category)
alias append_category add_category
private
+ def reset_correct_counts!
+ @total_member_count_correct = nil
+ @total_nonmember_count_correct = nil
+ end
+
+ def total_member_count_correct
+ @total_member_count_correct ||= @categories[:Member].values.inject(0) {|sum, element| sum+element}
+ end
+
+ def total_nonmember_count_correct
+ @total_nonmember_count_correct ||= @categories[:"Not member"].values.inject(0) {|sum, element| sum+element}
+ end
+
def total_member_count
@total_member_count ||= @categories[:Member].values.inject(0) {|sum, element| sum+element}
end
View
10 lib/classifier/extensions/vector.rb
@@ -6,11 +6,11 @@
require 'matrix'
require 'mathn'
-#class Array
-# def sum
-# inject(0) { |sum,term| sum += term }.to_f
-# end
-#end
+class Array
+ def sum
+ inject(0) { |sum,term| sum += term }.to_f
+ end
+end
class Vector
def magnitude
View
2  lib/classifier/extensions/word_hash.rb
@@ -94,6 +94,7 @@ def word_hash_for_words(words)
"dont",
"ever",
"first",
+ "for",
"from",
"have",
"her",
@@ -150,5 +151,6 @@ def word_hash_for_words(words)
"yes",
"you",
"youll",
+ "your"
].to_set
end
Please sign in to comment.
Something went wrong with that request. Please try again.