# milosgajdos83/gosom

Switch branches/tags
Nothing to show
Fetching contributors…
Cannot retrieve contributors at this time
189 lines (169 sloc) 5.42 KB
 package som import ( "container/heap" "fmt" "math" "gonum.org/v1/gonum/mat" ) // Distance calculates metric distance between vectors a and b. // If unsupported metric is requested Distance returns euclidean distance. // It returns error if the supplied vectors are either nil or have different dimensions func Distance(metric string, a, b []float64) (float64, error) { if a == nil || b == nil { return 0.0, fmt.Errorf("invalid vectors supplied. a: %v, b: %v", a, b) } if len(a) != len(b) { return 0.0, fmt.Errorf("Incorrect vector dims. a: %d, b: %d", len(a), len(b)) } switch metric { case "euclidean": return euclideanVec(a, b), nil default: return euclideanVec(a, b), nil } } // DistanceMx calculates metric distance matrix for the supplied matrix. // Distance matrix is also known in literature as dissimilarity matrix. // DistanceMx returns a hollow symmetric matrix where an item x_ij contains the distance between // vectors stored in rows i and j. If an unknown metric is supplied Euclidean distance is computed. // It returns error if the supplied matrix is nil. func DistanceMx(metric string, m *mat.Dense) (*mat.Dense, error) { if m == nil { return nil, fmt.Errorf("invalid matrix supplied: %v", m) } switch metric { case "euclidean": return euclideanMx(m), nil default: return euclideanMx(m), nil } } // ClosestVec finds the closest vector to v in the list of vectors stored in m rows // using the supplied distance metric. It returns an index to matrix m rows. // If unsupported metric is requested, ClosestVec falls over to euclidean metric. // If several vectors of the same distance are found, it returns the index of the first one found. // ClosestVec returns error if either v or m are nil or if the v dimension is different from // the number of m columns. When the ClosestVec fails with error returned index is set to -1. func ClosestVec(metric string, v []float64, m *mat.Dense) (int, error) { // vector can't be nil if v == nil || len(v) == 0 { return -1, fmt.Errorf("invalid vector: %v", v) } // matrix cant be nil if m == nil { return -1, fmt.Errorf("invalid matrix: %v", m) } // check if the dimensions are ok rows, _ := m.Dims() closest := 0 dist := math.MaxFloat64 for i := 0; i < rows; i++ { d, err := Distance(metric, v, m.RawRowView(i)) if err != nil { return -1, err } if d < dist { dist = d closest = i } } return closest, nil } // ClosestNVec finds the N closest vectors to v in the list of vectors stored in m rows // using the supplied distance metric. It returns a slice which contains indices to the m // rows. The length of the slice is the same as number of requested closest vectors - n. // ClosestNVec fails in the same way as ClosestVec. If n is higher than the number of // rows in m, or if it is not a positive integer, it fails with error too. func ClosestNVec(metric string, n int, v []float64, m *mat.Dense) ([]int, error) { // vector can't be nil if v == nil || len(v) == 0 { return nil, fmt.Errorf("invalid vector: %v", v) } // matrix cant be nil if m == nil { return nil, fmt.Errorf("invalid matrix: %v", m) } rows, _ := m.Dims() // n must be positive integer and smaller than number of rows in m if n <= 0 || n > rows { return nil, fmt.Errorf("invalid number of closest vectors requested: %d", n) } // we return slice of indices closest := make([]int, n) switch { case n == 1: idx, err := ClosestVec(metric, v, m) if err != nil { return nil, err } closest[0] = idx default: // no need to check for error h, _ := newFloat64Heap(n) rows, _ := m.Dims() for i := 0; i < rows; i++ { d, err := Distance(metric, v, m.RawRowView(i)) if err != nil { return nil, err } f := &float64Item{val: d, index: i} heap.Push(h, f) } // add max vals to closest slide for j := 0; j < n; j++ { closest[j] = (heap.Pop(h).(*float64Item)).index } } return closest, nil } // BMUs returns a slice which contains indices of Best Match Unit (BMU) codebook vectors for each // vector stored in data rows. Each item in the returned slice correspnds to index of BMU for // a particular data sample. If some data row has more than one BMU the index of the first one found is used. // It returns error if either the data or codebook are nil or if their dimensions are mismatched. func BMUs(data, codebook *mat.Dense) ([]int, error) { // data can't be nil if data == nil { return nil, fmt.Errorf("invalid data supplied: %v", data) } // codebook cant be nil if codebook == nil { return nil, fmt.Errorf("invalid codebook supplied: %v", codebook) } rows, _ := data.Dims() bmus := make([]int, rows) // loop through all data for i := 0; i < rows; i++ { idx, err := ClosestVec("euclidean", data.RawRowView(i), codebook) if err != nil { return nil, err } bmus[i] = idx } return bmus, nil } // euclideanVec computes euclidean distance between vectors a and b. func euclideanVec(a, b []float64) float64 { d := 0.0 for i := 0; i < len(a); i++ { d += (a[i] - b[i]) * (a[i] - b[i]) } return math.Sqrt(d) } // euclideanMx computes a matrix of euclidean distances between each row in m func euclideanMx(m *mat.Dense) *mat.Dense { rows, _ := m.Dims() out := mat.NewDense(rows, rows, nil) for row := 0; row < rows-1; row++ { dist := 0.0 a := m.RawRowView(row) for i := row + 1; i < rows; i++ { if i != row { b := m.RawRowView(i) dist = euclideanVec(a, b) out.Set(row, i, dist) out.Set(i, row, dist) } } } return out }