Skip to content

Commit

Permalink
deflate index flag
Browse files Browse the repository at this point in the history
  • Loading branch information
Marcus Lager committed May 1, 2017
1 parent 618ab93 commit 56de02e
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 28 deletions.
17 changes: 10 additions & 7 deletions rn-write.bat
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
call rn write --file c:\temp\10Mwikipedia.json --dir d:\resin\wikipedia --skip 0 --take 1000000
call rn write --file c:\temp\10Mwikipedia.json --dir d:\resin\wikipedia --skip 1000000 --take 1000000
call rn write --file c:\temp\10Mwikipedia.json --dir d:\resin\wikipedia --skip 2000000 --take 1000000
call rn write --file c:\temp\10Mwikipedia.json --dir d:\resin\wikipedia --skip 3000000 --take 1000000
call rn write --file c:\temp\10Mwikipedia.json --dir d:\resin\wikipedia --skip 4000000 --take 1000000
call rn write --file c:\temp\10Mwikipedia.json --dir d:\resin\wikipedia --skip 5000000 --take 1000000
call rn write --file c:\temp\10Mwikipedia.json --dir d:\resin\wikipedia --skip 6000000 --take 1000000
rem call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 100000 --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 200000 --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 300000 --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 400000 --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 500000 --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 600000 --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 700000 --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 800000 --take 100000 --compress
call rn write --file "C:\Users\Marcus\Downloads\enwiki-20120502-lines-1k.txt" --dir D:\resin\wikipedia --skip 900000 --take 100000 --compress
4 changes: 1 addition & 3 deletions src/Resin.Cli/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,7 @@ static void Query(string[] args)
{
string dir = null;
string indexName = null;
bool deflate = false;

if (Array.IndexOf(args, "--deflate") > 0) deflate = true;
if (Array.IndexOf(args, "--dir") > 0) dir = args[Array.IndexOf(args, "--dir") + 1];
if (Array.IndexOf(args, "--name") > 0) indexName = args[Array.IndexOf(args, "--name") + 1];

Expand All @@ -106,7 +104,7 @@ static void Query(string[] args)
{
var timer = new Stopwatch();
timer.Start();
using (var s = new Searcher(dir, new QueryParser(new Analyzer()), new Tfidf(), deflate))
using (var s = new Searcher(dir, new QueryParser(new Analyzer()), new Tfidf()))
{
result = s.Search(q, page, size);

Expand Down
2 changes: 2 additions & 0 deletions src/Resin/IO/IxInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ public class IxInfo

public int DocumentCount { get; set; }

public bool Compressed { get; set; }

public static IxInfo Load(string fileName)
{
var time = new Stopwatch();
Expand Down
8 changes: 4 additions & 4 deletions src/Resin/IO/Read/DocumentReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ namespace Resin.IO.Read
{
public class DocumentReader : BlockReader<Document>
{
private readonly bool _withCompression;
private readonly bool _deflate;

public DocumentReader(Stream stream, bool withCompression) : base(stream)
public DocumentReader(Stream stream, bool deflate) : base(stream)
{
_withCompression = withCompression;
_deflate = deflate;
}

protected override Document Deserialize(byte[] data)
{
return Serializer.DeserializeDocument(data, _withCompression);
return Serializer.DeserializeDocument(data, _deflate);
}
}
}
18 changes: 11 additions & 7 deletions src/Resin/IO/Serializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ public static byte[] Serialize(this IxInfo ix)

stream.Write(versionBytes, 0, sizeof(long));
stream.Write(docCountBytes, 0, sizeof(int));
stream.WriteByte(EncodedBoolean[ix.Compressed]);

return stream.ToArray();
}
Expand All @@ -198,6 +199,8 @@ public static IxInfo DeserializeIxInfo(Stream stream)

stream.Read(docCountBytes, 0, sizeof(int));

var compressed = stream.ReadByte();

if (!BitConverter.IsLittleEndian)
{
Array.Reverse(versionBytes);
Expand All @@ -207,7 +210,8 @@ public static IxInfo DeserializeIxInfo(Stream stream)
return new IxInfo
{
VersionId= BitConverter.ToInt64(versionBytes, 0),
DocumentCount = BitConverter.ToInt32(docCountBytes, 0)
DocumentCount = BitConverter.ToInt32(docCountBytes, 0),
Compressed = compressed==1
};
}

Expand Down Expand Up @@ -419,7 +423,7 @@ public static LcrsNode DeserializeNode(Stream stream)
}
}

public static Document DeserializeDocument(byte[] data, bool wasCompressed)
public static Document DeserializeDocument(byte[] data, bool deflate)
{
var idBytes = new byte[sizeof(int)];
Array.Copy(data, 0, idBytes, 0, sizeof(int));
Expand All @@ -434,7 +438,7 @@ public static Document DeserializeDocument(byte[] data, bool wasCompressed)
}

var id = BitConverter.ToInt32(idBytes, 0);
var dic = DeserializeStringStringDic(dicBytes, wasCompressed).ToDictionary(x=>x.Key, y=>y.Value);
var dic = DeserializeStringStringDic(dicBytes, deflate).ToDictionary(x=>x.Key, y=>y.Value);

return new Document(dic) {Id = id};
}
Expand Down Expand Up @@ -536,15 +540,15 @@ public static IEnumerable<DocumentPosting> DeserializePostings(byte[] data)
}
}

public static IEnumerable<KeyValuePair<string, string>> DeserializeStringStringDic(byte[] data, bool wasCompressed)
public static IEnumerable<KeyValuePair<string, string>> DeserializeStringStringDic(byte[] data, bool deflate)
{
using (var stream = new MemoryStream(data))
{
return DeserializeStringStringDic(stream, wasCompressed).ToList();
return DeserializeStringStringDic(stream, deflate).ToList();
}
}

public static IEnumerable<KeyValuePair<string, string>> DeserializeStringStringDic(Stream stream, bool wasCompressed)
public static IEnumerable<KeyValuePair<string, string>> DeserializeStringStringDic(Stream stream, bool deflate)
{
while (true)
{
Expand Down Expand Up @@ -592,7 +596,7 @@ public static IEnumerable<DocumentPosting> DeserializePostings(byte[] data)
Array.Reverse(valBytes);
}

string value = wasCompressed ? Compressor.DecompressText(valBytes) : Encoding.GetString(valBytes);
string value = deflate ? Compressor.DecompressText(valBytes) : Encoding.GetString(valBytes);

yield return new KeyValuePair<string, string>(key, value);
}
Expand Down
12 changes: 6 additions & 6 deletions src/Resin/Searcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,20 @@ public class Searcher : IDisposable
private readonly string _directory;
private readonly QueryParser _parser;
private readonly IScoringScheme _scorerFactory;
private readonly bool _compression;
private readonly IList<IxInfo> _ixs;
private readonly int _blockSize;
private readonly int _documentCount;

public Searcher(string directory, bool compression = false)
:this(directory, new QueryParser(new Analyzer()), new Tfidf(), compression)
public Searcher(string directory)
:this(directory, new QueryParser(new Analyzer()), new Tfidf())
{
}

public Searcher(string directory, QueryParser parser, IScoringScheme scorerFactory, bool compression = false)
public Searcher(string directory, QueryParser parser, IScoringScheme scorerFactory)
{
_directory = directory;
_parser = parser;
_scorerFactory = scorerFactory;
_compression = compression;

_ixs = Util.GetIndexFileNamesInChronologicalOrder(directory).Select(IxInfo.Load).ToList();

Expand Down Expand Up @@ -123,7 +121,9 @@ private IEnumerable<ScoredDocument> GetDocs(IList<DocumentScore> scores, IxInfo

var docFileName = Path.Combine(_directory, ix.VersionId + ".doc");

using (var docReader = new DocumentReader(new FileStream(docFileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096*4, FileOptions.SequentialScan), _compression))
using (var docReader = new DocumentReader(
new FileStream(docFileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096*4, FileOptions.SequentialScan),
ix.Compressed))
{
var dic = scores.ToDictionary(x => x.DocumentId, y => y.Score);

Expand Down
3 changes: 2 additions & 1 deletion src/Resin/UpsertOperation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ private IxInfo CreateIxInfo()
return new IxInfo
{
VersionId = _indexVersionId,
DocumentCount = _docId
DocumentCount = _docId,
Compressed = _compression
};
}
}
Expand Down

0 comments on commit 56de02e

Please sign in to comment.