Permalink
Browse files

Mann-Whitney / Kendall Tau in place

  • Loading branch information...
1 parent abd8758 commit 180fc478461f54063d4fc9c118f6fd36f02b90b9 @marktabler committed May 27, 2013
Showing with 110 additions and 81 deletions.
  1. +15 −2 lib/analyzer.rb
  2. +43 −40 lib/analyzer/diagnostics.rb
  3. +2 −2 lib/analyzer/kendall_tau.rb
  4. +44 −36 lib/analyzer/mann_whitney.rb
  5. +6 −1 lib/analyzer/phi_coefficient.rb
View
@@ -7,16 +7,29 @@
module Analyzer
def self.build(set_x, set_y)
- if both_sets_dichotomous?
+ if both_sets_dichotomous?(set_x, set_y)
PhiCoefficient.new(set_x, set_y)
- elsif one_set_ordinal?
+ elsif one_set_dichotomous?(set_x, set_y)
MannWhitney.new(set_x, set_y)
else
KendallTau.new(set_x, set_y)
end
end
+ def self.both_sets_dichotomous?(set_x, set_y)
+ set_x.uniq.size == 2 && set_y.uniq.size == 2
+ end
+
+ def self.one_set_dichotomous?(set_x, set_y)
+ set_x.uniq.size == 2 || set_y.uniq.size == 2
+ end
+
def self.correlation(set_x, set_y)
self.build(set_x, set_y).correlation
end
+
+ def self.benchmarks
+ KendallTau.benchmark
+ MannWhitney.benchmark
+ end
end
@@ -3,36 +3,45 @@ module Analyzer
MW_TEST_A = [[true, true, false, false, true, false, true, false],
[1, 2, 3, 4, 5, 6, 7, 8]]
- MW_TEST_B = [[2, 2, 3, 4, 5, 0, 7, 1],
- [true, true, true, true, true, false, true, false]]
+ MW_TEST_B = [[1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8],
+ [true, true, false, false, true, false, true, false, true, true, false, false, true, false, true, false]]
- MW_TEST_C = [[true, true, false, false, true, false, true, false, true, true, false, false, true, false, true, false],
- [1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]]
-
- MW_TEST_D = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+ MW_TEST_C = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 7, 8, 9, 7, 8, 9, 7, 8, 9, 1]]
- MW_TEST_E = [[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+ MW_TEST_D = [[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 7, 8, 9, 7, 8, 9, 7, 8, 9, 1]]
- MW_TEST_F = [[1, 2, 3],
+ MW_TEST_E = [[1, 2, 3],
[4, 5, 6]]
+ MW_TEST_F = [[1, 0, 1, 0, 1, 1, 1, 0],
+ [0, 1, 0, 1, 1, 1, 0, 1]]
- TEST_KA = [[4, 2, 5, 0.5, 1.5, 2, 0, 1, 0, 1.5, 0],
+ KT_TEST_A = [[4, 2, 5, 0.5, 1.5, 2, 0, 1, 0, 1.5, 0],
[7, 8, 4, 5.5, 4.5, 4, 5, 3, 2, 0.5, 1]]
- TEST_KB = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+ KT_TEST_B = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[12, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1]]
- TEST_KC = [[5, 2, 5],
+ KT_TEST_C = [[5, 2, 5],
[3, 4, 4]]
- TEST_KD = [[1, 2],
+ KT_TEST_D = [[1, 2],
[3, 4]]
- TEST_KE = [[5, 10, 15, 20],
+ KT_TEST_E = [[5, 10, 15, 20],
[4, 20, 16, 12]]
- TEST_KF = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ KT_TEST_F = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
[2, 3, 4, 5, 6, 7, 6, 9, 10, 11]]
- TEST_KG = [[16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+ KT_TEST_G = [[16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
[45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 55]]
+ def self.test
+ kt_sets = [KT_TEST_A, KT_TEST_B, KT_TEST_C, KT_TEST_D, KT_TEST_E, KT_TEST_F, KT_TEST_G]
+ mw_sets = [MW_TEST_A, MW_TEST_B, MW_TEST_C, MW_TEST_D, MW_TEST_E, MW_TEST_F]
+ (kt_sets + mw_sets).each do |set|
+ a = self.build(*set)
+ a.display_diagnostics
+ end
+ puts "Analyzer self test complete"
+ end
+
class KendallTau
def self.test
@@ -50,7 +59,7 @@ def self.test
taub = self.new(*set)
taub.send(:display_diagnostics)
end
- puts "\nDiagnostics complete."
+ puts "\nKendall Tau Test Complete."
end
def self.benchmark
@@ -71,46 +80,38 @@ def self.benchmark
end
def display_diagnostics
- puts "\n\n"
+ puts "\n---------------------"
+ puts "Analyzer: #{self.class}"
+ puts "---------------------"
puts "Correlation: #{correlation} "
puts "Relationship: #{relationship}"
puts "---------------------"
puts "Set X: #{@x}"
puts "Set Y: #{@y}"
- puts "Pairs: #{pairs}"
puts "---------------------"
puts "Concordant: #{@nc}"
puts "Discordant: #{@na}"
puts "Tied X: #{@nx}"
puts "Tied Y: #{@ny}"
- puts "---------------------"
+ puts "---------------------\n"
end
end
class MannWhitney
def self.test
- invert_c = [MW_TEST_C.first.map(&:!), MW_TEST_C.last]
- u_test = self.new(*MW_TEST_A)
- u_test.diagnostics
- u_test = self.new(*MW_TEST_B)
- u_test.diagnostics
- u_test = self.new(*MW_TEST_C)
- u_test.diagnostics
- u_test = self.new(*invert_c)
- u_test.diagnostics
- u_test = self.new(*MW_TEST_D)
- u_test.diagnostics
- u_test = self.new(*MW_TEST_E)
- u_test.diagnostics
- begin
- u_test = self.new(*MW_TEST_F)
- u_test.diagnostics
- rescue Exception => e
- puts e.message
+ invert_b = [MW_TEST_B.last.map(&:!), MW_TEST_B.first]
+ [MW_TEST_A, MW_TEST_B, invert_b, MW_TEST_C, MW_TEST_D, MW_TEST_E, MW_TEST_F].each do |set|
+ begin
+ mw_test = self.new(*set)
+ mw_test.display_diagnostics
+ rescue Exception => e
+ puts "Error caught in test set:"
+ puts e.message
+ end
end
- puts "Diagnostics complete"
+ puts "\nMann-Whitney Test Complete"
end
def self.benchmark
@@ -131,7 +132,9 @@ def self.benchmark
end
def display_diagnostics
- puts "\n--------------------------------"
+ puts "\n---------------------"
+ puts "Analyzer: #{self.class}"
+ puts "---------------------"
puts "Correlation (z): #{correlation}"
puts "Ordinal set: #{@ordinal}"
puts "Dichotomous set: #{@dichotomous}"
@@ -142,7 +145,7 @@ def display_diagnostics
puts "u: #{@u}"
puts "z: #{@z}"
puts "relationship: #{relationship}"
- puts "--------------------------------"
+ puts "--------------------------------\n\n"
end
end
end
@@ -46,7 +46,7 @@ def to_f
def p1?
return false unless sufficient_data?
if @x.size <= 30
- correlation.abs >= SIG_P1[pairs.size]
+ correlation.abs >= SIG_P1[@x.size]
else
correlation.abs >= SIG_P1_40
end
@@ -55,7 +55,7 @@ def p1?
def p5?
return false unless sufficient_data?
if @x.size <= 30
- correlation.abs >= SIG_P5[pairs.size]
+ correlation.abs >= SIG_P5[@x.size]
else
correlation.abs >= SIG_P5_40
end
@@ -1,42 +1,12 @@
module Analyzer
class MannWhitney
include Math
+
def initialize(set_x, set_y)
@dichotomous, @ordinal = identify_dichotomous_set(set_x, set_y)
@keys = [@dichotomous.uniq.first, @dichotomous.uniq.last]
end
- def identify_dichotomous_set(set_x, set_y)
- if set_x.uniq.size == 2
- [set_x, set_y]
- elsif set_y.uniq.size == 2
- [set_y, set_x]
- else
- raise "Exactly one set must contain 2 discrete terms. X: #{set_x.uniq.size} terms; Y: #{set_y.uniq.size} terms. "
- end
- end
-
- def examine_rankings
- @ranks = Hash.new(0)
- pairs = @dichotomous.zip(@ordinal)
- pairs.each do |pair|
- pairs.each do |comp_pair|
- compare(pair, comp_pair)
- end
- end
- end
-
- def compare(pair, comp_pair)
- return if pair.first == comp_pair.first
- if pair.last > comp_pair.last
- @ranks[pair.first] += 1
- elsif pair.last == comp_pair.last
- @ranks.keys.each do |key|
- @ranks[key] += 0.5
- end
- end
- end
-
def correlation
return @z if @z
examine_rankings
@@ -52,6 +22,14 @@ def sufficient_data?
(n1 >= 4 || n2 >= 4) && n1 + n2 >= 12
end
+ def to_f
+ correlation.round(4)
+ end
+
+ def to_s
+ relationship
+ end
+
def relationship
if significant?
if @ranks[@keys.first] > @ranks[@keys.last]
@@ -66,6 +44,41 @@ def relationship
end
end
+ private
+
+ def identify_dichotomous_set(set_x, set_y)
+ set_x_dichotomous = set_x.uniq.size == 2
+ set_y_dichotomous = set_y.uniq.size == 2
+ if set_x_dichotomous == set_y_dichotomous
+ raise "Exactly one set must contain 2 discrete terms. X: #{set_x.uniq.size} terms; Y: #{set_y.uniq.size} terms. "
+ elsif set_x_dichotomous
+ [set_x, set_y]
+ else
+ [set_y, set_x]
+ end
+ end
+
+ def examine_rankings
+ @ranks = Hash.new(0)
+ pairs = @dichotomous.zip(@ordinal)
+ pairs.each do |pair|
+ pairs.each do |comp_pair|
+ compare(pair, comp_pair)
+ end
+ end
+ end
+
+ def compare(pair, comp_pair)
+ return if pair.first == comp_pair.first
+ if pair.last.to_f > comp_pair.last.to_f
+ @ranks[pair.first] += 1
+ elsif pair.last == comp_pair.last
+ @ranks.keys.each do |key|
+ @ranks[key] += 0.5
+ end
+ end
+ end
+
def mu
(n1 * n2) / 2.0
end
@@ -81,10 +94,5 @@ def n1
def n2
@dichotomous.size - n1
end
-
- def to_s
- relationship
- end
-
end
end
@@ -1,6 +1,11 @@
module Analyzer
class PhiCoefficient
include Math
-
+ def initialize(set_x, set_y)
+ puts "Not Yet Implemented"
+ end
+ def display_diagnostics
+ puts "#{self.class}: Not yet Implemented"
+ end
end
end

0 comments on commit 180fc47

Please sign in to comment.