Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

projector: fix broken KNN #5063

Merged
merged 3 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 22 additions & 1 deletion tensorboard/plugins/projector/vz_projector/BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
load("//tensorboard/defs:defs.bzl", "tf_js_binary", "tf_ts_library")
load("//tensorboard/defs:defs.bzl", "tf_js_binary", "tf_ng_web_test_suite", "tf_ts_library")
load("//tensorboard/defs:web.bzl", "tb_combine_html", "tf_web_library")

package(default_visibility = ["//tensorboard/plugins/projector:__subpackages__"])
Expand Down Expand Up @@ -122,3 +122,24 @@ tb_combine_html(
":standalone_lib",
],
)

################# Test ##################

tf_ts_library(
name = "vz_projector_test_lib",
testonly = True,
srcs = [
"knn_test.ts",
],
deps = [
":vz_projector",
"@npm//@types/jasmine",
],
)

tf_ng_web_test_suite(
name = "vz_projector_test",
deps = [
":vz_projector_test_lib",
],
)
20 changes: 17 additions & 3 deletions tensorboard/plugins/projector/vz_projector/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -454,14 +454,28 @@ export class DataSet {
// Handle the case where we've previously found the nearest neighbors.
const previouslyComputedNNeighbors =
this.nearest && this.nearest.length ? this.nearest[0].length : 0;
if (this.nearest != null && previouslyComputedNNeighbors >= nNeighbors) {
if (
this.nearest != null &&
this.nearest.length >= data.length &&
previouslyComputedNNeighbors >= nNeighbors
) {
return Promise.resolve(
this.nearest.map((neighbors) => neighbors.slice(0, nNeighbors))
this.nearest
// `this.points` is only set and constructor and `data` is subset of
// it. If `nearest` is calculated with N = 1000 sampled points before
// and we are asked to calculate KNN ofN = 50, pretend like we
// recalculated the KNN for N = 50 by taking first 50 of result from
// N = 1000.
.slice(0, data.length)
// NearestEntry has list of K-nearest vector indices at given index.
// Hence, if we already precomputed K = 100 before and later seek
// K-10, we just have ot take the first ten.
.map((neighbors) => neighbors.slice(0, nNeighbors))
);
} else {
const knnGpuEnabled = (await util.hasWebGLSupport()) && !IS_FIREFOX;
const result = await (knnGpuEnabled
? knn.findKNNGPUCosine(data, nNeighbors, (d) => d.vector)
? knn.findKNNGPUCosDistNorm(data, nNeighbors, (d) => d.vector)
: knn.findKNN(
data,
nNeighbors,
Expand Down
55 changes: 43 additions & 12 deletions tensorboard/plugins/projector/vz_projector/knn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,18 @@ export type NearestEntry = {
const OPTIMAL_GPU_BLOCK_SIZE = 256;
/** Id of message box used for knn gpu progress bar. */
const KNN_GPU_MSG_ID = 'knn-gpu';

/**
* Returns the K nearest neighbors for each vector where the distance
* computation is done on the GPU (WebGL) using cosine distance.
*
* @param dataPoints List of data points, where each data point holds an
* n-dimensional vector.
* n-dimensional vector. Assumes that the vector is already normalized to unit
* norm.
* @param k Number of nearest neighbors to find.
* @param accessor A method that returns the vector, given the data point.
*/
export function findKNNGPUCosine<T>(
export function findKNNGPUCosDistNorm<T>(
dataPoints: T[],
k: number,
accessor: (dataPoint: T) => Float32Array
Expand All @@ -60,20 +62,38 @@ export function findKNNGPUCosine<T>(
// pair of points, which we sort using KMin data structure to obtain the
// K nearest neighbors for each point.
const nearest: NearestEntry[][] = new Array(N);
const numPieces = Math.ceil(N / OPTIMAL_GPU_BLOCK_SIZE);
let M = Math.floor(N / numPieces);
let modulo = N % numPieces;
let numPieces = Math.ceil(N / OPTIMAL_GPU_BLOCK_SIZE);
const actualPieceSize = Math.floor(N / numPieces);
const modulo = N % actualPieceSize;
numPieces += modulo ? 1 : 0;
let offset = 0;
let progress = 0;
let progressDiff = 1 / (2 * numPieces);
let piece = 0;

const typedArray = vector.toTypedArray(dataPoints, accessor);
const bigMatrix = tf.tensor(typedArray, [N, dim]);
const bigMatrixTransposed = bigMatrix.transpose();
const bigMatrixTransposed = tf.transpose(bigMatrix);
// 1 - A * A^T.
const bigMatrixSquared = tf.matMul(bigMatrix, bigMatrixTransposed);
const cosDistMatrix = tf.sub(1, bigMatrixSquared);
const splits = tf.split(cosDistMatrix, numPieces, 1);

let maybePaddedCosDistMatrix = cosDistMatrix;
if (actualPieceSize * numPieces > N) {
// Expect the input to be rank 2 (though it is not typed that way) so we
// want to pad the first dimension so we split very evenly (all splitted
// tensor have exactly the same dimesion).
const padding: Array<[number, number]> = [
[0, actualPieceSize * numPieces - N],
[0, 0],
];
maybePaddedCosDistMatrix = tf.pad(cosDistMatrix, padding);
}
const splits = tf.split(
maybePaddedCosDistMatrix,
new Array(numPieces).fill(actualPieceSize),
0
);

function step(resolve: (result: NearestEntry[][]) => void) {
let progressMsg =
Expand All @@ -82,25 +102,34 @@ export function findKNNGPUCosine<T>(
.runAsyncTask(
progressMsg,
async () => {
const B = piece < modulo ? M + 1 : M;
// `.data()` returns flattened Float32Array of B * N dimension.
// For matrix of
// [ 1 2 ]
// [ 3 4 ],
// `.data()` returns [1, 2, 3, 4].
const partial = await splits[piece].data();
progress += progressDiff;
for (let i = 0; i < B; i++) {
for (let i = 0; i < actualPieceSize; i++) {
let kMin = new KMin<NearestEntry>(k);
let iReal = offset + i;
if (iReal >= N) break;
for (let j = 0; j < N; j++) {
// Skip diagonal entries.
if (j === iReal) {
continue;
}
const cosDist = partial[j * B + i];
kMin.add(cosDist, {index: j, dist: cosDist});
// Access i * N's row at `j` column.
// Reach row has N entries and j-th index has cosine distance
// between iReal vs. j-th vectors.
const cosDist = partial[i * N + j];
if (cosDist >= 0) {
kMin.add(cosDist, {index: j, dist: cosDist});
}
}
nearest[iReal] = kMin.getMinKItems();
}
progress += progressDiff;
offset += B;
offset += actualPieceSize;
piece++;
},
KNN_GPU_MSG_ID
Expand Down Expand Up @@ -253,3 +282,5 @@ export function findKNNofPoint<T>(
}
return kMin.getMinKItems();
}

export const TEST_ONLY = {OPTIMAL_GPU_BLOCK_SIZE};
129 changes: 129 additions & 0 deletions tensorboard/plugins/projector/vz_projector/knn_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
import {findKNNGPUCosDistNorm, findKNN, NearestEntry, TEST_ONLY} from './knn';
import {cosDistNorm, unit} from './vector';

describe('projector knn test', () => {
function getIndices(nearest: NearestEntry[][]): number[][] {
return nearest.map((nNearest) => {
return nNearest.map(({index}) => index);
});
}

function unitVector(vector: Float32Array): Float32Array {
// `unit` method replaces the vector in-place.
unit(vector);
return vector;
}

describe('#findKNNGPUCosDistNorm', () => {
it('finds n-nearest neighbor for each item', async () => {
const values = await findKNNGPUCosDistNorm(
[
{a: unitVector(new Float32Array([1, 2, 0]))},
{a: unitVector(new Float32Array([1, 1, 3]))},
{a: unitVector(new Float32Array([100, 30, 0]))},
{a: unitVector(new Float32Array([95, 23, 3]))},
{a: unitVector(new Float32Array([100, 10, 0]))},
{a: unitVector(new Float32Array([95, 23, 100]))},
],
4,
(data) => data.a
);

expect(getIndices(values)).toEqual([
[2, 3, 4, 5],
[5, 0, 3, 2],
[3, 4, 5, 0],
[2, 4, 5, 0],
[3, 2, 5, 0],
[1, 3, 2, 4],
]);
});

it('returns less than N when number of item is lower', async () => {
const values = await findKNNGPUCosDistNorm(
[
unitVector(new Float32Array([1, 2, 0])),
unitVector(new Float32Array([1, 1, 3])),
],
4,
(a) => a
);

expect(getIndices(values)).toEqual([[1], [0]]);
});

it('splits a large data into one that would fit into GPU memory', async () => {
const size = TEST_ONLY.OPTIMAL_GPU_BLOCK_SIZE + 5;
const data = new Array(size).fill(
unitVector(new Float32Array([1, 1, 1]))
);
const values = await findKNNGPUCosDistNorm(data, 1, (a) => a);

expect(getIndices(values)).toEqual([
// Since distance to the diagonal entries (distance to self is 0) is
// non-sensical, the diagonal entires are ignored. So for the first
// item, the nearest neighbor should be 2nd item (index 1).
[1],
...new Array(size - 1).fill([0]),
]);
});
});

describe('#findKNN', () => {
// Covered by equality tests below (#findKNNGPUCosDistNorm == #findKNN).
});

describe('#findKNNGPUCosDistNorm and #findKNN', () => {
it('returns same value when dist metrics are cosine', async () => {
const data = [
unitVector(new Float32Array([1, 2, 0])),
unitVector(new Float32Array([1, 1, 3])),
unitVector(new Float32Array([100, 30, 0])),
unitVector(new Float32Array([95, 23, 3])),
unitVector(new Float32Array([100, 10, 0])),
unitVector(new Float32Array([95, 23, 100])),
];
const findKnnGpuCosVal = await findKNNGPUCosDistNorm(data, 2, (a) => a);
const findKnnVal = await findKNN(
data,
2,
(a) => a,
(a, b, limit) => cosDistNorm(a, b)
);

// Floating point precision makes it hard to test. Just assert indices.
expect(getIndices(findKnnGpuCosVal)).toEqual(getIndices(findKnnVal));
});

it('splits a large data without the result being wrong', async () => {
const size = TEST_ONLY.OPTIMAL_GPU_BLOCK_SIZE + 5;
const data = Array.from(new Array(size)).map((_, index) => {
return unitVector(new Float32Array([index + 1, index + 1]));
});

const findKnnGpuCosVal = await findKNNGPUCosDistNorm(data, 2, (a) => a);
const findKnnVal = await findKNN(
data,
2,
(a) => a,
(a, b, limit) => cosDistNorm(a, b)
);

expect(getIndices(findKnnGpuCosVal)).toEqual(getIndices(findKnnVal));
});
});
});