Permalink
Browse files

spaces -> tabs

  • Loading branch information...
1 parent 913603a commit 0ac75ef1f65a4aaeadba4fb57d58803bd3f3ea54 @ayende ayende committed Dec 1, 2011
Showing with 1,024 additions and 1,023 deletions.
  1. +936 −936 Bundles/Raven.Bundles.MoreLikeThis/MoreLikeThis.cs
  2. +88 −87 Bundles/Raven.Bundles.MoreLikeThis/RavenMoreLikeThis.cs
@@ -8,100 +8,101 @@
namespace Raven.Bundles.MoreLikeThis
{
- class RavenMoreLikeThis : Similarity.Net.MoreLikeThis
- {
- private readonly IndexReader _ir;
+ class RavenMoreLikeThis : Similarity.Net.MoreLikeThis
+ {
+ private readonly IndexReader _ir;
- public Dictionary<string, Analyzer> Analyzers { get; set; }
+ public Dictionary<string, Analyzer> Analyzers { get; set; }
- public RavenMoreLikeThis(IndexReader ir) : base(ir)
- {
- _ir = ir;
- }
+ public RavenMoreLikeThis(IndexReader ir)
+ : base(ir)
+ {
+ _ir = ir;
+ }
- protected override PriorityQueue RetrieveTerms(int docNum)
- {
- var fieldNames = GetFieldNames();
-
- var termFreqMap = new System.Collections.Hashtable();
- var d = _ir.Document(docNum);
- foreach (var fieldName in fieldNames)
- {
- var vector = _ir.GetTermFreqVector(docNum, fieldName);
+ protected override PriorityQueue RetrieveTerms(int docNum)
+ {
+ var fieldNames = GetFieldNames();
- // field does not store term vector info
- if (vector == null)
- {
- var text = d.GetValues(fieldName);
- if (text != null)
- {
- foreach (var t in text)
- {
- AddTermFrequencies(new StringReader(t), termFreqMap, fieldName);
- }
- }
- }
- else
- {
- AddTermFrequencies(termFreqMap, vector);
- }
- }
+ var termFreqMap = new System.Collections.Hashtable();
+ var d = _ir.Document(docNum);
+ foreach (var fieldName in fieldNames)
+ {
+ var vector = _ir.GetTermFreqVector(docNum, fieldName);
- return CreateQueue(termFreqMap);
- }
+ // field does not store term vector info
+ if (vector == null)
+ {
+ var text = d.GetValues(fieldName);
+ if (text != null)
+ {
+ foreach (var t in text)
+ {
+ AddTermFrequencies(new StringReader(t), termFreqMap, fieldName);
+ }
+ }
+ }
+ else
+ {
+ AddTermFrequencies(termFreqMap, vector);
+ }
+ }
- protected new void AddTermFrequencies(System.IO.StreamReader r, System.Collections.IDictionary termFreqMap, System.String fieldName)
- {
- var analyzer = Analyzers[fieldName];
- TokenStream ts = analyzer.TokenStream(fieldName, r);
+ return CreateQueue(termFreqMap);
+ }
+
+ protected new void AddTermFrequencies(System.IO.StreamReader r, System.Collections.IDictionary termFreqMap, System.String fieldName)
+ {
+ var analyzer = Analyzers[fieldName];
+ TokenStream ts = analyzer.TokenStream(fieldName, r);
TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));
- int tokenCount = 0;
- while (ts.IncrementToken())
- {
- // for every token
- System.String word = termAtt.Term();
- tokenCount++;
- if (tokenCount > GetMaxNumTokensParsed())
- {
- break;
- }
- if (IsNoiseWord(word))
- {
- continue;
- }
+ int tokenCount = 0;
+ while (ts.IncrementToken())
+ {
+ // for every token
+ System.String word = termAtt.Term();
+ tokenCount++;
+ if (tokenCount > GetMaxNumTokensParsed())
+ {
+ break;
+ }
+ if (IsNoiseWord(word))
+ {
+ continue;
+ }
- // increment frequency
- var cnt = (Int)termFreqMap[word];
- if (cnt == null)
- {
- termFreqMap[word] = new Int();
- }
- else
- {
- cnt.x++;
- }
- }
- }
+ // increment frequency
+ var cnt = (Int)termFreqMap[word];
+ if (cnt == null)
+ {
+ termFreqMap[word] = new Int();
+ }
+ else
+ {
+ cnt.x++;
+ }
+ }
+ }
- protected new bool IsNoiseWord(System.String term)
- {
- int len = term.Length;
- var minWordLen = GetMinWordLen();
- var maxWordLen = GetMaxWordLen();
- var stopWords = GetStopWords();
- if (minWordLen > 0 && len < minWordLen)
- {
- return true;
- }
- if (maxWordLen > 0 && len > maxWordLen)
- {
- return true;
- }
- if (stopWords != null && stopWords.Contains(term.ToLower()))
- {
- return true;
- }
- return false;
- }
- }
+ protected new bool IsNoiseWord(System.String term)
+ {
+ int len = term.Length;
+ var minWordLen = GetMinWordLen();
+ var maxWordLen = GetMaxWordLen();
+ var stopWords = GetStopWords();
+ if (minWordLen > 0 && len < minWordLen)
+ {
+ return true;
+ }
+ if (maxWordLen > 0 && len > maxWordLen)
+ {
+ return true;
+ }
+ if (stopWords != null && stopWords.Contains(term.ToLower()))
+ {
+ return true;
+ }
+ return false;
+ }
+ }
}

0 comments on commit 0ac75ef

Please sign in to comment.