forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added accuracy tests for c and n_trees variation.
- Loading branch information
1 parent
a7a5788
commit 6a0366a
Showing
2 changed files
with
91 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
""" | ||
Testing for the Locality Sensitive Hashing Forest | ||
module (sklearn.neighbors.LSHForest). | ||
""" | ||
|
||
# Author: Gilles Louppe | ||
|
||
import numpy as np | ||
|
||
from sklearn.utils.testing import assert_array_equal | ||
from sklearn.utils.testing import assert_array_almost_equal | ||
from sklearn.utils.testing import assert_equal | ||
from sklearn.utils.testing import assert_raises | ||
from sklearn.utils.testing import assert_greater | ||
from sklearn.utils.testing import assert_less | ||
from sklearn.utils.testing import assert_true | ||
from sklearn.utils.testing import assert_warns | ||
|
||
from sklearn.metrics import euclidean_distances | ||
from sklearn.neighbors import LSHForest | ||
|
||
|
||
def test_neighbors_accuracy_with_c(): | ||
"""Accuracy increases as `c` increases.""" | ||
c_values = np.array([10, 50, 250]) | ||
samples = 1000 | ||
dim = 50 | ||
n_iter = 10 | ||
n_points = 20 | ||
accuracies = np.zeros(c_values.shape[0], dtype=float) | ||
X = np.random.rand(samples, dim) | ||
|
||
for i in range(c_values.shape[0]): | ||
lshf = LSHForest(c=c_values[i]) | ||
lshf.fit(X) | ||
for j in range(n_iter): | ||
point = X[np.random.randint(0, samples)] | ||
neighbors = lshf.kneighbors(point, n_neighbors=n_points, | ||
return_distance=False) | ||
distances = euclidean_distances(point, X) | ||
ranks = np.argsort(distances)[0, :n_points] | ||
|
||
intersection = np.intersect1d(ranks, neighbors).shape[0] | ||
ratio = intersection/float(n_points) | ||
accuracies[i] = accuracies[i] + ratio | ||
|
||
accuracies[i] = accuracies[i]/float(n_iter) | ||
|
||
# Sorted accuracies should be equal to original accuracies | ||
assert_array_equal(accuracies, np.sort(accuracies), | ||
err_msg="Accuracies are not non-decreasing.") | ||
|
||
|
||
def test_neighbors_accuracy_with_n_trees(): | ||
"""Accuracy increases as `n_trees` increases.""" | ||
n_trees = np.array([1, 10, 100]) | ||
samples = 1000 | ||
dim = 50 | ||
n_iter = 10 | ||
n_points = 20 | ||
accuracies = np.zeros(n_trees.shape[0], dtype=float) | ||
X = np.random.rand(samples, dim) | ||
|
||
for i in range(n_trees.shape[0]): | ||
lshf = LSHForest(c=500, n_trees=n_trees[i]) | ||
lshf.fit(X) | ||
for j in range(n_iter): | ||
point = X[np.random.randint(0, samples)] | ||
neighbors = lshf.kneighbors(point, n_neighbors=n_points, | ||
return_distance=False) | ||
distances = euclidean_distances(point, X) | ||
ranks = np.argsort(distances)[0, :n_points] | ||
|
||
intersection = np.intersect1d(ranks, neighbors).shape[0] | ||
ratio = intersection/float(n_points) | ||
accuracies[i] = accuracies[i] + ratio | ||
|
||
accuracies[i] = accuracies[i]/float(n_iter) | ||
|
||
# Sorted accuracies should be equal to original accuracies | ||
assert_array_equal(accuracies, np.sort(accuracies), | ||
err_msg="Accuracies are not non-decreasing.") | ||
|
||
|
||
if __name__ == "__main__": | ||
import nose | ||
nose.runmodule() |