Merge aa6f1d9 into fafd7df

montanaflynn · Jan 17, 2019 · 5258353 · 5258353
2 parents fafd7df + aa6f1d9
commit 5258353
Show file tree

Hide file tree

Showing 10 changed files with 254 additions and 20 deletions.
diff --git a/Makefile b/Makefile
@@ -1,17 +1,24 @@
-.PHONY: all
+default: lint test
+
+deps:
+	go get github.com/alecthomas/gometalinter
+	gometalinter --install
 
 doc:
 	godoc `pwd`
 
 webdoc:
 	godoc -http=:44444
 
-format: 
+format:
 	go fmt
 
+lint: format
+	gometalinter --disable gocyclo deadcode
+
 test:
-	go test -race 
-	
+	go test -race
+
 check: format test
 
 benchmark:
@@ -20,10 +27,3 @@ benchmark:
 coverage:
 	go test -coverprofile=coverage.out
 	go tool cover -html="coverage.out"
-
-lint: format
-	go get github.com/alecthomas/gometalinter
-	gometalinter --install
-	gometalinter 
-
-default: lint test
diff --git a/README.md b/README.md
@@ -65,13 +65,16 @@ var (
 type Float64Data []float64
 
 func LoadRawData(raw interface{}) (f Float64Data) {}
+
 func AutoCorrelation(data Float64Data, lags int) (float64, error) {}
 func ChebyshevDistance(dataPointX, dataPointY []float64) (distance float64, err error) {}
 func Correlation(data1, data2 Float64Data) (float64, error) {}
+func Counts(input Float64Data) map[float64]int {}
 func Covariance(data1, data2 Float64Data) (float64, error) {}
 func CovariancePopulation(data1, data2 Float64Data) (float64, error) {}
 func CumulativeSum(input Float64Data) ([]float64, error) {}
 func EuclideanDistance(dataPointX, dataPointY []float64) (distance float64, err error) {}
+func FindUniques(input []float64) []float64 {}
 func GeometricMean(input Float64Data) (float64, error) {}
 func HarmonicMean(input Float64Data) (float64, error) {}
 func InterQuartileRange(input Float64Data) (float64, error) {}
@@ -89,7 +92,7 @@ func Pearson(data1, data2 Float64Data) (float64, error) {}
 func Percentile(input Float64Data, percent float64) (percentile float64, err error) {}
 func PercentileNearestRank(input Float64Data, percent float64) (percentile float64, err error) {}
 func PopulationVariance(input Float64Data) (pvar float64, err error) {}
-func Round(input float64, places int) (rounded float64, err error) {}
+func RemoveDuplicates(input []float64) []float64 {}
 func Sample(input Float64Data, takenum int, replacement bool) ([]float64, error) {}
 func SampleVariance(input Float64Data) (svar float64, err error) {}
 func Sigmoid(input Float64Data) ([]float64, error) {}
@@ -128,6 +131,9 @@ type Quartiles struct {
 
 func Quartile(input Float64Data) (Quartiles, error) {}
 func QuartileOutliers(input Float64Data) (Outliers, error) {}
+
+func Round(input float64, places int) (rounded float64, err error) {}
+func Float64ToInt(input float64) (output int) {}
 ```
 
 ## Contributing

diff --git a/data.go b/data.go
@@ -146,3 +146,9 @@ func (f Float64Data) Covariance(d Float64Data) (float64, error) {
 func (f Float64Data) CovariancePopulation(d Float64Data) (float64, error) {
 	return CovariancePopulation(f, d)
 }
+
+// FindUniques returns any unique values in the data
+func (f Float64Data) FindUniques() []float64 { return FindUniques(f) }
+
+// RemoveDuplicates removes duplicate numbers and NaN from the data
+func (f Float64Data) RemoveDuplicates() []float64 { return RemoveDuplicates(f) }
diff --git a/data_test.go b/data_test.go
@@ -9,6 +9,7 @@ import (
 
 var data1 = Float64Data{-10, -10.001, 5, 1.1, 2, 3, 4.20, 5}
 var data2 = Float64Data{-9, -9.001, 4, .1, 1, 2, 3.20, 5}
+var data3 = Float64Data{1, 2, 2, 3, 4, 5, 4, 6, 6, math.NaN(), math.Inf(-1)}
 
 func getFunctionName(i interface{}) string {
 	return runtime.FuncForPC(reflect.ValueOf(i).Pointer()).Name()
@@ -106,6 +107,20 @@ func TestHelperMethods(t *testing.T) {
 		t.Errorf("Mode() => %.1f != %.1f", mo, []float64{5.0})
 	}
 
+	// Test FindUniques
+	uv := data3.FindUniques()
+	uvs := []float64{1, 3, 5, math.Inf(-1)}
+	if !reflect.DeepEqual(uv, uvs) {
+		t.Errorf("FindUniques() => %f != %f", uv, uvs)
+	}
+
+	// Test RemoveDuplicates
+	rd := data3.RemoveDuplicates()
+	rds := []float64{1, 2, 3, 4, 5, 6, math.Inf(-1)}
+	if !reflect.DeepEqual(rd, rds) {
+		t.Errorf("RemoveDuplicates() => %f != %f", rd, rds)
+	}
+
 	// Test InterQuartileRange
 	iqr, _ := data1.InterQuartileRange()
 	if iqr != 9.05 {

diff --git a/examples/main.go b/examples/main.go
@@ -159,10 +159,22 @@ func main() {
 	// Output: 0.4
 
 	sig, _ := stats.Sigmoid([]float64{3.0, 1.0, 0.2})
-	fmt.Println(s)
+	fmt.Println(sig)
 	// Output: [0.9525741268224334 0.7310585786300049 0.549833997312478]
 
 	sm, _ := stats.SoftMax([]float64{3.0, 1.0, 0.2})
 	fmt.Println(sm)
 	// Output: [0.8360188027814407 0.11314284146556013 0.05083835575299916]
+
+	u := stats.RemoveDuplicates([]float64{3.0, 1.0, 3.0, 0.2, 1.0})
+	fmt.Println(u)
+	// Output: [3 1 0.2]
+
+	u = stats.FindUniques([]float64{3.0, 1.0, 3.0, 0.2, 1.0})
+	fmt.Println(u)
+	// Output: [0.2]
+
+	i := stats.Float64ToInt(4.8213)
+	fmt.Println(i)
+	// Output: 5
 }
diff --git a/load_test.go b/load_test.go
@@ -143,7 +143,7 @@ var allTestData = []struct {
 	},
 }
 
-func equal(actual, expected Float64Data) bool {
+func equalData(actual, expected Float64Data) bool {
 	if len(actual) != len(expected) {
 		return false
 	}
@@ -160,7 +160,7 @@ func equal(actual, expected Float64Data) bool {
 func TestLoadRawData(t *testing.T) {
 	for _, data := range allTestData {
 		actual := LoadRawData(data.actual)
-		if !equal(actual, data.expected) {
+		if !equalData(actual, data.expected) {
 			t.Fatalf("Transform(%v). Expected [%v], Actual [%v]", data.actual, data.expected, actual)
 		}
 	}

diff --git a/unique.go b/unique.go
@@ -0,0 +1,55 @@
+package stats
+
+import "math"
+
+// RemoveDuplicates removes duplicate numbers
+// in a slice which also means it removes NaN
+func RemoveDuplicates(input []float64) []float64 {
+	seen := make(map[float64]bool)
+	pos := 0
+	for _, f := range input {
+		if math.IsNaN(f) {
+			continue
+		}
+		if _, ok := seen[f]; !ok {
+			seen[f] = true
+			input[pos] = f
+			pos++
+		}
+	}
+	input = input[:pos]
+	return input
+}
+
+// FindUniques finds any unique numbers in a
+// slice which are not repeated while ignoring
+// NaN since they are always unique
+func FindUniques(input []float64) []float64 {
+	size := len(input)
+	freq := []int{}
+	for i := 0; i < size; i++ {
+		freq = append(freq, -1)
+	}
+	output := []float64{}
+	for i := 0; i < size; i++ {
+		if math.IsNaN(input[i]) {
+			continue
+		}
+		count := 1
+		for j := i + 1; j < size; j++ {
+			if input[i] == input[j] {
+				count++
+				freq[j] = 0
+			}
+		}
+		if freq[i] != 0 {
+			freq[i] = count
+		}
+	}
+	for i := 0; i < size; i++ {
+		if freq[i] == 1 {
+			output = append(output, input[i])
+		}
+	}
+	return output
+}
diff --git a/unique_test.go b/unique_test.go
@@ -0,0 +1,97 @@
+package stats
+
+import (
+	"math"
+	"testing"
+)
+
+func equalSlice(a, b []float64) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i, v := range a {
+		if v != b[i] && !(math.IsNaN(v) && math.IsNaN(b[i])) {
+			return false
+		}
+	}
+	return true
+}
+
+func TestRemoveDuplicates(t *testing.T) {
+	var testCases = []struct {
+		in  []float64
+		out []float64
+	}{
+		{
+			[]float64{-4, 2, -4, -2, 2, 4, 4},
+			[]float64{-4, 2, -2, 4},
+		},
+		{
+			[]float64{59, math.Inf(-1), -959.3, math.NaN(), -784, 59, 74.3, math.NaN(), 38.2, math.Inf(-1), 905, math.Inf(1)},
+			[]float64{59, math.Inf(-1), -959.3, -784, 74.3, 38.2, 905, math.Inf(1)},
+		},
+	}
+	for _, tc := range testCases {
+		output := RemoveDuplicates(tc.in)
+		if !equalSlice(output, tc.out) {
+			t.Errorf("got %v, want %v", output, tc.out)
+		}
+	}
+}
+
+func BenchmarkRemoveDuplicatesSmallFloatSlice(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		RemoveDuplicates(makeFloatSlice(5))
+	}
+}
+
+func BenchmarkRemoveDuplicatesLargeFloatSlice(b *testing.B) {
+	lf := makeFloatSlice(100000)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		RemoveDuplicates(lf)
+	}
+}
+
+func TestFindUniques(t *testing.T) {
+	var testCases = []struct {
+		in  []float64
+		out []float64
+	}{
+		{
+			[]float64{-4, -42, 2, math.Inf(-1), -4, -2, 2, math.Inf(1), 4, math.Inf(1), 4, 5},
+			[]float64{-42, math.Inf(-1), -2, 5},
+		},
+		{
+			[]float64{59, math.Inf(-1), math.NaN(), -959.7485, -784, 59, 74.3, 238.2, math.Inf(-1), 905, math.Inf(1)},
+			[]float64{-959.7485, -784, 74.3, 238.2, 905, math.Inf(1)},
+		},
+		{
+			[]float64{1, math.NaN(), 2, 3, 4, 1, 2, 3, math.NaN()},
+			[]float64{4},
+		},
+		{
+			[]float64{1, math.NaN(), 2, 3, 4, 1, 2, 3},
+			[]float64{4},
+		},
+	}
+	for _, tc := range testCases {
+		output := FindUniques(tc.in)
+		if !equalSlice(output, tc.out) {
+			t.Errorf("got %v, want %v", output, tc.out)
+		}
+	}
+}
+func BenchmarkFindUniquesSmallFloatSlice(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		FindUniques(makeFloatSlice(5))
+	}
+}
+
+func BenchmarkFindUniquesLargeFloatSlice(b *testing.B) {
+	lf := makeFloatSlice(100000)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		FindUniques(lf)
+	}
+}
diff --git a/util.go b/util.go
@@ -1,16 +1,30 @@
 package stats
 
 import (
+	"math"
 	"sort"
 	"time"
 )
 
-// float64ToInt rounds a float64 to an int
-func float64ToInt(input float64) (output int) {
+// Float64ToInt rounds a float64 to an int
+func Float64ToInt(input float64) (output int) {
 	r, _ := Round(input, 0)
 	return int(r)
 }
 
+// Counts returns a map with the times an item
+// is included in the provided slice or array
+func Counts(input Float64Data) map[float64]int {
+	c := make(map[float64]int, len(input))
+	for _, value := range input {
+		if math.IsNaN(value) {
+			continue
+		}
+		c[value]++
+	}
+	return c
+}
+
 // unixnano returns nanoseconds from UTC epoch
 func unixnano() int64 {
 	return time.Now().UTC().UnixNano()

diff --git a/util_test.go b/util_test.go
@@ -1,6 +1,8 @@
 package stats
 
 import (
+	"fmt"
+	"math"
 	"math/rand"
 	"testing"
 )
@@ -26,16 +28,43 @@ func makeRandFloatSlice(c int) []float64 {
 }
 
 func TestFloat64ToInt(t *testing.T) {
-	m := float64ToInt(234.0234)
+	m := Float64ToInt(234.0234)
 	if m != 234 {
 		t.Errorf("%x != %x", m, 234)
 	}
-	m = float64ToInt(-234.0234)
+	m = Float64ToInt(-234.0234)
 	if m != -234 {
 		t.Errorf("%x != %x", m, -234)
 	}
-	m = float64ToInt(1)
+	m = Float64ToInt(1)
 	if m != 1 {
 		t.Errorf("%x != %x", m, 1)
 	}
 }
+
+func ExampleCounts() {
+	d := []float64{1, 1, 2, 3, 4, 4, 4, 4}
+	c := Counts(d)
+	fmt.Println(c[1], c[2], c[3], c[4])
+	// Output: 2 1 1 4
+}
+
+func TestCounts(t *testing.T) {
+	d := []float64{1, math.NaN(), math.NaN(), 1, 2, 3, 4, 4, 4, 4, math.Inf(-1), math.Inf(1)}
+	c := Counts(d)
+	if c[1] != 2 {
+		t.Errorf("%x != %x", c[1], 2)
+	}
+	if c[2] != 1 {
+		t.Errorf("%x != %x", c[2], 1)
+	}
+	if c[4] != 4 {
+		t.Errorf("%x != %x", c[4], 4)
+	}
+	if c[math.Inf(-1)] != 1 {
+		t.Errorf("%x != %x", c[1], 1)
+	}
+	if c[math.Inf(1)] != 1 {
+		t.Errorf("%x != %x", c[1], 1)
+	}
+}