Skip to content

Commit

Permalink
feat(k-means): add meansLatLon centroid strategy, docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
postspectacular committed Apr 20, 2021
1 parent 2491668 commit 269c11c
Showing 1 changed file with 64 additions and 1 deletion.
65 changes: 64 additions & 1 deletion packages/k-means/src/kmeans.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ import { SYSTEM, uniqueIndices, weightedRandom } from "@thi.ng/random";
import { add, median, mulN, ReadonlyVec, Vec, zeroes } from "@thi.ng/vectors";
import type { CentroidStrategy, Cluster, KMeansOpts } from "./api";

/**
* Takes an array of n-dimensional `samples` and attempts to assign them to `k`
* clusters, using the behavior defined by (optionally) given `opts`.
*
* @remarks
* https://en.wikipedia.org/wiki/K-medians_clustering
*
* @param k
* @param samples
* @param opts
* @returns
*/
export const kmeans = <T extends ReadonlyVec>(
k: number,
samples: T[],
Expand Down Expand Up @@ -45,7 +57,8 @@ export const kmeans = <T extends ReadonlyVec>(
};

/**
* k-means++ initialization / selection of initial cluster centroids.
* k-means++ initialization / selection of initial cluster centroids. Default
* centroid initialization method for {@link kmeans}.
*
* @remarks
* References:
Expand Down Expand Up @@ -113,6 +126,12 @@ const buildClusters = (centroids: ReadonlyVec[], clusters: number[]) => {
return indices;
};

/**
* Default centroid strategy forming new centroids by averaging the position of
* participating samples.
*
* @param dim
*/
export const means: CentroidStrategy = (dim) => {
const acc = zeroes(dim);
let n = 0;
Expand All @@ -125,10 +144,54 @@ export const means: CentroidStrategy = (dim) => {
};
};

/**
* Centroid strategy forming new centroids via componentwise medians.
*
* @remarks
* https://en.wikipedia.org/wiki/K-medians_clustering
*/
export const medians: CentroidStrategy = () => {
const acc: ReadonlyVec[] = [];
return {
update: (p) => acc.push(p),
finish: () => (acc.length ? median([], acc) : undefined),
};
};

/**
* Means centroid strategy for decimal degree lat/lon positions (e.g. WGS84).
* Unlike the default {@link means} strategy, this one treats latitude values
* correctly in terms of the ±180 deg boundary and ensures samples on either
* side of the Pacific are forming correct centroids.
*
* @remarks
* When using this strategy, you should also use the
* {@link @thi.ng/distance#HAVERSINE_LATLON} distance metric for
* {@link KMeansOpts.distance}.
*
* @example
* ```ts
* kmeans(3, [...], { strategy: meansLatLon, dist: HAVERSINE_LATLON })
* ```
*
* https://en.wikipedia.org/wiki/World_Geodetic_System
*/
export const meansLatLon: CentroidStrategy = () => {
let lat = 0;
let lon = 0;
let n = 0;
return {
update: ([$lat, $lon]) => {
lat += $lat < 0 ? $lat + 360 : $lat;
lon += $lon;
n++;
},
finish: () => {
if (!n) return;
lat /= n;
if (lat > 180) lat -= 360;
lon /= n;
return [lat, lon];
},
};
};

0 comments on commit 269c11c

Please sign in to comment.