Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

New tool for building index from a text file

  • Loading branch information...
commit 85b481b7299926dcb4736d951faec08bcabd09cf 1 parent 454583f
@lalinsky authored
View
1  .gitignore
@@ -8,6 +8,7 @@ CMakeCache.txt
/fpsearch
/tests
/fpi-add
+/fpi-import
doc/tree_structure.png
/fpstats
/fpsearch2
View
3  CMakeLists.txt
@@ -74,6 +74,9 @@ target_link_libraries(fpdumpindex ${QT_LIBRARIES} fpindexlib)
add_executable(fpi-add src/tools/fpi-add.cpp)
target_link_libraries(fpi-add ${QT_LIBRARIES} fpindexlib)
+add_executable(fpi-import src/tools/fpi-import.cpp)
+target_link_libraries(fpi-import ${QT_LIBRARIES} fpindexlib)
+
set(tests_SOURCES
src/index/segment_data_writer_test.cpp
src/index/segment_index_test.cpp
View
10 src/common.h
@@ -21,4 +21,14 @@
#include "util/exceptions.h"
#include "util/scoped_ptr.h"
+namespace Acoustid {
+
+// Some default configuration options
+static const int MAX_SEGMENT_BUFFER_SIZE = 1024 * 1025 * 5;
+static const int BLOCK_SIZE = 512;
+static const int MAX_MERGE_AT_ONCE = 10;
+static const int MAX_SEGMENTS_PER_TIER = 10;
+
+}
+
#endif
View
2  src/index/index_reader.cpp
@@ -10,8 +10,6 @@
#include "segment_searcher.h"
#include "index_reader.h"
-#define BLOCK_SIZE 512
-
using namespace Acoustid;
IndexReader::IndexReader(Directory *dir)
View
47 src/index/index_reader_test.cpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2011 Lukas Lalinsky
+// Distributed under the MIT license, see the LICENSE file for details.
+
+#include <gtest/gtest.h>
+#include "util/test_utils.h"
+#include "store/ram_directory.h"
+#include "store/input_stream.h"
+#include "store/output_stream.h"
+#include "top_hits_collector.h"
+#include "index_writer.h"
+#include "index_reader.h"
+
+using namespace Acoustid;
+
+TEST(IndexReaderTest, OpenEmpty)
+{
+ RAMDirectory dir;
+
+ IndexReader reader(&dir);
+ ASSERT_FALSE(dir.fileExists("segments_0"));
+ ASSERT_THROW(reader.open(), IOException);
+}
+
+TEST(IndexReaderTest, Search)
+{
+ RAMDirectory dir;
+
+ IndexWriter writer(&dir);
+ writer.open(true);
+ uint32_t fp[] = { 7, 9, 12 };
+ writer.addDocument(1, fp, 3);
+ writer.commit();
+ writer.addDocument(2, fp, 3);
+ writer.commit();
+
+ IndexReader reader(&dir);
+ reader.open();
+
+ TopHitsCollector collector(100);
+ reader.search(fp, 3, &collector);
+ ASSERT_EQ(2, collector.topResults().size());
+ ASSERT_EQ(1, collector.topResults().at(0).id());
+ ASSERT_EQ(1.0, collector.topResults().at(0).score());
+ ASSERT_EQ(2, collector.topResults().at(1).id());
+ ASSERT_EQ(1.0, collector.topResults().at(1).score());
+}
+
View
10 src/index/index_writer.cpp
@@ -12,12 +12,10 @@
#include "segment_merger.h"
#include "index_writer.h"
-#define BLOCK_SIZE 512
-
using namespace Acoustid;
IndexWriter::IndexWriter(Directory *dir)
- : IndexReader(dir), m_numDocsInBuffer(0), m_maxSegmentBufferSize(1024 * 1024 * 10)
+ : IndexReader(dir), m_numDocsInBuffer(0), m_maxSegmentBufferSize(MAX_SEGMENT_BUFFER_SIZE)
{
m_mergePolicy = new SegmentMergePolicy();
}
@@ -80,7 +78,8 @@ void IndexWriter::maybeMerge()
if (merge.isEmpty()) {
return;
}
-
+ //qDebug() << "Merging segments" << merge;
+
SegmentInfo info(m_infos.incLastSegmentId());
SegmentMerger merger(segmentDataWriter(info));
for (size_t i = 0; i < merge.size(); i++) {
@@ -110,7 +109,8 @@ void IndexWriter::flush()
if (m_segmentBuffer.empty()) {
return;
}
- std::sort(m_segmentBuffer.begin(), m_segmentBuffer.end());
+ //qDebug() << "Writing new segment" << (m_segmentBuffer.size() * 8.0 / 1024 / 1024);
+ qSort(m_segmentBuffer.begin(), m_segmentBuffer.end());
SegmentInfo info(m_infos.incLastSegmentId(), m_numDocsInBuffer);
ScopedPtr<SegmentDataWriter> writer(segmentDataWriter(info));
View
10 src/index/index_writer.h
@@ -19,6 +19,16 @@ class IndexWriter : public IndexReader
IndexWriter(Directory *dir);
virtual ~IndexWriter();
+ size_t maxSegmentBufferSize() const
+ {
+ return m_maxSegmentBufferSize;
+ }
+
+ void setMaxSegmentBufferSize(size_t maxSegmentBufferSize)
+ {
+ m_maxSegmentBufferSize = maxSegmentBufferSize;
+ }
+
void open(bool create = false);
SegmentMergePolicy *segmentMergePolicy()
View
2  src/index/segment_merge_policy.h
@@ -13,7 +13,7 @@ namespace Acoustid {
class SegmentMergePolicy
{
public:
- SegmentMergePolicy(int maxMergeAtOnce = 10, int maxSegmentsPerTier = 10);
+ SegmentMergePolicy(int maxMergeAtOnce = MAX_MERGE_AT_ONCE, int maxSegmentsPerTier = MAX_SEGMENTS_PER_TIER);
virtual ~SegmentMergePolicy();
void setMaxMergeAtOnce(int maxMergeAtOnce)
View
64 src/tools/fpi-import.cpp
@@ -0,0 +1,64 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <QTextStream>
+#include "index/index_writer.h"
+#include "store/fs_directory.h"
+#include "util/options.h"
+
+using namespace Acoustid;
+
+int main(int argc, char **argv)
+{
+ OptionParser parser("%prog [options]");
+ parser.addOption("directory", 'd')
+ .setArgument()
+ .setHelp("index directory")
+ .setMetaVar("DIR");
+ parser.addOption("create", 'c')
+ .setHelp("create an index in the directory");
+ Options *opts = parser.parse(argc, argv);
+
+ QString path = ".";
+ if (opts->contains("directory")) {
+ path = opts->option("directory");
+ }
+
+ FSDirectory dir(path);
+ IndexWriter writer(&dir);
+ try {
+ writer.open(opts->contains("create"));
+ }
+ catch (IOException &ex) {
+ qCritical() << "ERROR:" << ex.what();
+ return 1;
+ }
+
+ QTextStream in(stdin);
+ size_t counter = 0;
+ while (!in.atEnd()) {
+ QStringList line = in.readLine().split('|');
+ if (line.size() != 2) {
+ qWarning() << "Invalid line";
+ continue;
+ }
+ int id = line.at(0).toInt();
+ QString fpstr = line.at(1);
+ if (fpstr.startsWith('{') && fpstr.endsWith('}')) {
+ fpstr = fpstr.mid(1, fpstr.size() - 2);
+ }
+ QStringList fparr = fpstr.split(',');
+ uint32_t fp[4096];
+ for (int i = 0; i < fparr.size(); i++) {
+ fp[i] = fparr.at(i).toInt();
+ }
+ writer.addDocument(id, fp, fparr.size());
+ if (counter % 1000 == 0) {
+ qDebug() << "Imported" << counter << "lines";
+ }
+ counter++;
+ }
+ writer.commit();
+
+ return 0;
+}
+
Please sign in to comment.
Something went wrong with that request. Please try again.