-
Notifications
You must be signed in to change notification settings - Fork 449
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1) Adding an older, slower, version of NNDescent. 2) Adding a script to download all VLDB 2015 datasets. 3) Modifying references for NN-descent.
- Loading branch information
1 parent
dc0d301
commit eb0d0ae
Showing
12 changed files
with
546 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/bin/bash | ||
. ./lib.sh | ||
|
||
function get_item() { | ||
md5=$1 | ||
fn=$2 | ||
|
||
Download "https://s3.amazonaws.com/RemoteDisk/TextCollections/VLDB2015/$fn" "$fn" | ||
CheckMD5 "$fn" "$md5" | ||
} | ||
|
||
|
||
# SQFD | ||
get_item c12453480d0d161563f8f84fbe317834 in_10_10k.txt.bz2 | ||
|
||
cat <<EOF | ||
The Signature Quadratic Form Distance (SQFD) distance was proposed by Christian Beecks: | ||
@inproceedings{beecks2010signature, | ||
title={Signature quadratic form distance}, | ||
author={Beecks, Christian and Uysal, Merih Seran and Seidl, Thomas}, | ||
booktitle={Proceedings of the ACM International Conference on Image and Video Retrieval}, | ||
pages={438--445}, | ||
year={2010}, | ||
organization={ACM} | ||
} | ||
The data set to be used with SQFD was created using ImageNet. | ||
@article{russakovsky2014imagenet, | ||
title={Imagenet large scale visual recognition challenge}, | ||
author={Russakovsky, Olga and Deng, Jia and Su, Hao and Krause, Jonathan and Satheesh, Sanjeev and Ma, Sean and Huang, Zhiheng and Karpathy, Andrej and Khosla, Aditya and Bernstein, Michael and others}, | ||
journal={International Journal of Computer Vision}, | ||
pages={1--42}, | ||
year={2014}, | ||
publisher={Springer} | ||
} | ||
EOF | ||
|
||
|
||
# DNA | ||
get_item 47833a114cc6eb67c10c129c65221a5f dna5M_32_4.txt.bz2 | ||
|
||
cat <<EOF | ||
DNA sequences were sampled from the human genome, see: | ||
http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/ | ||
EOF | ||
|
||
# SIFT | ||
get_item 334e871baf3338b2ae889bc9f8d5f7dc sift_texmex_learn5m.txt.bz2 | ||
|
||
cat <<EOF | ||
The SIFT 1B data sets is a part of the TexMex collection http://corpus-texmex.irisa.fr/ | ||
If you use them, please, consider citing: | ||
@inproceedings{jegou2011searching, | ||
title={Searching in one billion vectors: re-rank with source coding}, | ||
author={J{\'e}gou, Herv{\'e} and Tavenard, Romain and Douze, Matthijs and Amsaleg, Laurent}, | ||
booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2011 IEEE International Conference on}, | ||
pages={861--864}, | ||
year={2011}, | ||
organization={IEEE} | ||
} | ||
EOF | ||
|
||
get_item c471e23616fab3a4ab7d18b85af9be26 wikipedia_lda128.txt.bz2 | ||
get_item 07afa2abe6e4cddd117ae4750dc59c62 wikipedia_lda8.txt.bz2 | ||
|
||
|
||
cat <<EOF | ||
All Wikipedia based data sets used in the VLDB 2015 work were created using the gensim library. | ||
If you use it, please, consider citing: | ||
@inproceedings{rehurek_lrec, | ||
title = {{Software Framework for Topic Modelling | ||
with Large Corpora}}, | ||
author = {Radim {\v R}eh{\r u}{\v r}ek and Petr Sojka}, | ||
booktitle = {{Proceedings of the LREC 2010 Workshop on New | ||
Challenges for NLP Frameworks}}, | ||
pages = {45--50}, | ||
year = 2010, | ||
month = May, | ||
day = 22, | ||
publisher = {ELRA}, | ||
address = {Valletta, Malta}, | ||
note={\url{http://is.muni.cz/publication/884893/en}}, | ||
language={English} | ||
} | ||
EOF | ||
|
||
#./download_wikipedia_sparse.sh | ||
|
||
if [ "$?" != "0" ] ; then | ||
echo "Download of Wikipedia-sparse failed" | ||
exit 1 | ||
fi | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/** | ||
* Non-metric Space Library | ||
* | ||
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info). | ||
* With contributions from Lawrence Cayton (http://lcayton.com/) and others. | ||
* | ||
* For the complete list of contributors and further details see: | ||
* https://github.com/searchivarius/NonMetricSpaceLib | ||
* | ||
* Copyright (c) 2014 | ||
* | ||
* This code is released under the | ||
* Apache License Version 2.0 http://www.apache.org/licenses/. | ||
* | ||
*/ | ||
|
||
#ifndef _FACTORY_NNDES_OLD_ H_ | ||
#define _FACTORY_NNDES_OLD_ H_ | ||
|
||
#include <method/nndes_old.h> | ||
|
||
namespace similarity { | ||
|
||
/* | ||
* Creating functions. | ||
*/ | ||
|
||
template <typename dist_t> | ||
Index<dist_t>* CreateNNDescentOld(bool PrintProgress, | ||
const string& SpaceType, | ||
const Space<dist_t>* space, | ||
const ObjectVector& DataObjects, | ||
const AnyParams& AllParams) { | ||
|
||
return new NNDescentMethodOld<dist_t>(PrintProgress, | ||
space, DataObjects, AllParams); | ||
} | ||
|
||
/* | ||
* End of creating functions. | ||
*/ | ||
|
||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
/** | ||
* Non-metric Space Library | ||
* | ||
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info). | ||
* With contributions from Lawrence Cayton (http://lcayton.com/) and others. | ||
* | ||
* For the complete list of contributors and further details see: | ||
* https://github.com/searchivarius/NonMetricSpaceLib | ||
* | ||
* This is a wrapper class for the Wei Dong implementation of https://code.google.com/p/nndes/, | ||
* which also contains some of the original code from Wei Dong's repository. | ||
* Wei Dong, Charikar Moses, and Kai Li. 2011. Efficient k-nearest neighbor graph construction for generic similarity measures. | ||
* In Proceedings of the 20th international conference on World wide web (WWW '11). ACM, New York, NY, USA, 577-586. | ||
* | ||
* The Wei Dong's code can be redistributed given that the license (see below) is retained in the source code. | ||
*/ | ||
/* | ||
Copyright (C) 2010,2011 Wei Dong <wdong@wdong.org> | ||
All rights reserved. | ||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | ||
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | ||
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | ||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
#ifndef NNDES_METHOD_OLD_H_ | ||
#define NNDES_METHOD_OLD_H_ | ||
|
||
#include <string> | ||
#include <sstream> | ||
#include <memory> | ||
|
||
#include "index.h" | ||
#include "space.h" | ||
|
||
#include "nndes/nndes-common.h" | ||
#include "nndes/nndes.h" | ||
|
||
#define METH_NNDES_OLD "nndes_old" | ||
|
||
namespace similarity { | ||
|
||
using std::string; | ||
|
||
template <typename dist_t> | ||
class NNDescentMethodOld : public Index<dist_t> { | ||
public: | ||
NNDescentMethodOld(bool PrintProgress, | ||
const Space<dist_t>* space, | ||
const ObjectVector& data, | ||
const AnyParams& AllParams); | ||
~NNDescentMethodOld(){}; | ||
|
||
/* | ||
* Just the name of the method, consider printing crucial parameter values | ||
*/ | ||
const std::string ToString() const { | ||
stringstream str; | ||
str << "NNDescentMethodOld method: "; | ||
return str.str(); | ||
} | ||
|
||
void Search(RangeQuery<dist_t>* query); | ||
void Search(KNNQuery<dist_t>* query); | ||
|
||
virtual vector<string> GetQueryTimeParamNames() const; | ||
|
||
class SpaceOracle { | ||
public: | ||
SpaceOracle(const Space<dist_t>* space, const ObjectVector& data) : | ||
space_(space), data_(data) {} | ||
inline dist_t operator()(IdType id1, IdType id2) const { | ||
return space_->IndexTimeDistance(data_.at(id1), data_.at(id2)); | ||
} | ||
private: | ||
const Space<dist_t>* space_; | ||
const ObjectVector& data_; | ||
}; | ||
private: | ||
void SearchGreedy(KNNQuery<dist_t>* query); | ||
void SearchSmallWorld(KNNQuery<dist_t>* query); | ||
|
||
typedef pair<dist_t, IdType> EvaluatedNode; | ||
|
||
virtual void SetQueryTimeParamsInternal(AnyParamManager& ); | ||
|
||
const Space<dist_t>* space_; | ||
const ObjectVector& data_; | ||
size_t NN_; // K in the original Wei Dong's code nndes.cpp | ||
size_t searchNN_; | ||
size_t controlQty_; // control in the original Wei Dong's code nndes.cpp | ||
size_t iterationQty_; // iteration in the original Wei Dong's code nndes.cpp | ||
float rho_; | ||
float delta_; | ||
|
||
SpaceOracle nndesOracle_; | ||
unique_ptr<NNDescent<SpaceOracle>> nndesObj_; | ||
|
||
size_t initSearchAttempts_; | ||
bool greedy_; | ||
// disable copy and assign | ||
DISABLE_COPY_AND_ASSIGN(NNDescentMethodOld); | ||
}; | ||
|
||
} // namespace similarity | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.