Skip to content

Commit

Permalink
#7 Added basic XML documentation on classes that were lacking these c…
Browse files Browse the repository at this point in the history
…omments. Updated project to include XML documentation on build to make sure this gets included with the NuGet package.
  • Loading branch information
Ryan McCoy committed Feb 22, 2018
1 parent 192a726 commit a37dcff
Show file tree
Hide file tree
Showing 13 changed files with 180 additions and 3 deletions.
13 changes: 12 additions & 1 deletion Maybe/BloomFilter/BloomFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,33 @@

namespace Maybe.BloomFilter
{
/// <summary>
/// Basic bloom filter collection
/// </summary>
/// <typeparam name="T">Type of data that will be contained in the bloom filter.</typeparam>
[Serializable]
public class BloomFilter<T> : BloomFilterBase<T>
{
private readonly BitArray _collectionState;

/// <summary>
/// Protected constructor -- use BloomFilter.Create instead of this.
/// </summary>
/// <param name="bitArraySize"></param>
/// <param name="numHashes"></param>
protected BloomFilter(int bitArraySize, int numHashes) : base(bitArraySize, numHashes)
{
_collectionState = new BitArray(bitArraySize, false);
}

/// <summary>
/// Gets the ratio of how many bits in the bloom filter are set to the total number of bits. When this ratio is too high, the chance for error increases.
/// </summary>
public override double FillRatio => _collectionState.Cast<bool>().Count(bit => bit) / (double)_collectionState.Length;

/// <summary>
/// Creates a new bloom filter with appropriate bit width and hash functions for your expected size and error rate.
/// </summary>
/// <typeparam name="T">The type of item to be held in the bloom filter</typeparam>
/// <param name="expectedItems">The maximum number of items you expect to be in the bloom filter</param>
/// <param name="acceptableErrorRate">The maximum rate of false positives you can accept. Must be a value between 0.00-1.00</param>
/// <returns>A new bloom filter configured appropriately for number of items and error rate</returns>
Expand Down
18 changes: 18 additions & 0 deletions Maybe/BloomFilter/BloomFilterBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,24 @@

namespace Maybe.BloomFilter
{
/// <summary>
/// Base class for bloom filter to contain some common member variables and hashing helper functions.
/// </summary>
/// <typeparam name="T"></typeparam>
[Serializable]
public abstract class BloomFilterBase<T> : IBloomFilter<T>
{
/// <summary>
/// The number of times an item should be hashed when being added to or checked for membership in the collection
/// </summary>
protected int NumberHashes;
private readonly int _collectionLength;

/// <summary>
/// Protected constructor to create a new bloom filter
/// </summary>
/// <param name="bitArraySize">The number of bits that should be used internally to store items.</param>
/// <param name="numberHashes">The number of times an input should be hashed before working against the internal bit array.</param>
protected BloomFilterBase(int bitArraySize, int numberHashes)
{
NumberHashes = numberHashes;
Expand Down Expand Up @@ -39,6 +52,11 @@ protected BloomFilterBase(int bitArraySize, int numberHashes)
/// </summary>
public abstract double FillRatio { get; }

/// <summary>
/// Hashes the <paramref name="item"/> provided and passes the hashed result to an action for processing (typically setting bits in the bit array or checking if those bits are set)
/// </summary>
/// <param name="item"></param>
/// <param name="hashAction"></param>
protected void DoHashAction(T item, Action<int> hashAction)
{
var hashes = MurmurHash3.GetHashes(item, NumberHashes, _collectionLength);
Expand Down
9 changes: 8 additions & 1 deletion Maybe/BloomFilter/CountingBloomFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ public class CountingBloomFilter<T> : BloomFilterBase<T>
{
private readonly byte[] _collectionState;

/// <summary>
/// Creates a new counting bloom filter -- a bloom filter capable of tracking how many times a bit has been set
/// </summary>
/// <param name="arraySize">Size of the internal bit array to track items</param>
/// <param name="numHashes">Number of times the input should be hashed before working with the bit array.</param>
protected CountingBloomFilter(int arraySize, int numHashes) : base(arraySize, numHashes)
{
_collectionState = new byte[arraySize];
Expand All @@ -21,12 +26,14 @@ protected CountingBloomFilter(int arraySize, int numHashes) : base(arraySize, nu
}
}

/// <summary>
/// Gets the ratio of how many bits in the bloom filter are set to the total number of bits. When this ratio is too high, the chance for error increases.
/// </summary>
public override double FillRatio => _collectionState.Count(position => position > 0) / (double)_collectionState.Length;

/// <summary>
/// Creates a new counting bloom filter with appropriate bit width and hash functions for your expected size and error rate.
/// </summary>
/// <typeparam name="T">The type of item to be held in the counting bloom filter</typeparam>
/// <param name="expectedItems">The maximum number of items you expect to be in the counting bloom filter</param>
/// <param name="acceptableErrorRate">The maximum rate of false positives you can accept. Must be a value between 0.00-1.00</param>
/// <returns>A new bloom filter configured appropriately for number of items and error rate</returns>
Expand Down
4 changes: 4 additions & 0 deletions Maybe/BloomFilter/IBloomFilter.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
namespace Maybe.BloomFilter
{
/// <summary>
/// Generic bloom filter interface to describe basic operations for any type of bloom filter.
/// </summary>
/// <typeparam name="T"></typeparam>
public interface IBloomFilter<T>
{
/// <summary>
Expand Down
19 changes: 19 additions & 0 deletions Maybe/BloomFilter/ScalableBloomFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,29 @@ namespace Maybe.BloomFilter
[Serializable]
public class ScalableBloomFilter<T> : IBloomFilter<T>
{
/// <summary>
/// The minimum number of items that this scalable bloom filter will handle.
/// </summary>
public const int MinimumCapacity = 50;

private IEnumerable<BloomFilterBase<T>> _filters;
private readonly double _maxErrorRate;
private int _activeItemCount;
private int _capacity;

/// <summary>
/// Creates a new bloom filter with error rate limited to the desired ratio.
/// </summary>
/// <param name="maximumErrorRate">Maximum error rate to tolerate -- more memory will be used to reduce error rate.</param>
public ScalableBloomFilter(double maximumErrorRate)
{
_maxErrorRate = maximumErrorRate;
}

/// <summary>
/// Adds a new item to the bloom filter and scales the bloom filter as needed.
/// </summary>
/// <param name="item"></param>
public void Add(T item)
{
if (_activeItemCount >= _capacity)
Expand All @@ -34,8 +45,16 @@ public void Add(T item)
_filters.Last().Add(item);
}

/// <summary>
/// Checks whether an item may currently exist in the bloom filter.
/// </summary>
/// <param name="item">The item to check for membership in this <see cref="ScalableBloomFilter{T}"/></param>
/// <returns>True if the item MIGHT be in the collection. False if the item is NOT in the collection.</returns>
public bool Contains(T item) => _filters != null && _filters.Any(filter => filter.Contains(item));

/// <summary>
/// Gets the number of filters that are currently being used internally to hold items without exceeding the error rate.
/// </summary>
public int NumberFilters => _filters.Count();

private static IEnumerable<BloomFilterBase<T>> AddNewFilter(double maxError, int capacity, IEnumerable<BloomFilterBase<T>> currentFilters)
Expand Down
56 changes: 56 additions & 0 deletions Maybe/CountMinSketch/CountMinSketch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

namespace Maybe.CountMinSketch
{
/// <summary>
/// Count min sketch is a data structure that allows you to track the frequency of an item occurring within a large set. The count min sketch will never undercount items, but it can overcount by a controllable confidence interval.
/// </summary>
/// <typeparam name="T"></typeparam>
[Serializable]
public class CountMinSketch<T> : CountMinSketchBase<T>
{
Expand All @@ -13,6 +17,12 @@ public class CountMinSketch<T> : CountMinSketchBase<T>
private long[] _hashA;
private long _totalCount;

/// <summary>
/// Creates a new instance of <see cref="CountMinSketch{T}"/>
/// </summary>
/// <param name="depth">The number of buckets to be used for counting. More buckets will increase the probability of computing the value correctly.</param>
/// <param name="width">The size of buckets for counting items. Wider buckets will increase accuracy but use more memory.</param>
/// <param name="seed">Some seed for random number generation. This is passed in to allow multiple sketches to be sync'ed with the same seed.</param>
public CountMinSketch(int depth, int width, int seed)
{
if (depth <= 0) { throw new ArgumentException("Depth must be a positive integer.", nameof(depth)); }
Expand All @@ -26,6 +36,12 @@ public CountMinSketch(int depth, int width, int seed)
InitTablesWith(depth, width, seed);
}

/// <summary>
/// Creates a new instance of <see cref="CountMinSketch{T}"/>
/// </summary>
/// <param name="epsilon">The accuracy of the counts produced by this data structure.</param>
/// <param name="confidence">The probability of computing the value correctly.</param>
/// <param name="seed">Some seed for random number generation. This is passed in to allow multiple sketches to be sync'ed with the same seed.</param>
public CountMinSketch(double epsilon, double confidence, int seed)
{
if (epsilon <= 0d) { throw new ArgumentException("Relative error must be positive.", nameof(epsilon)); }
Expand All @@ -49,22 +65,52 @@ private void InitTablesWith(int depth, int width, int seed)
}
}

/// <summary>
/// Gets the seed that was used to initialize this CountMinSketch.
/// </summary>
public override int Seed { get; }

/// <summary>
/// Gets the epsilon setting used to initialize this <see cref="CountMinSketch{T}"/>.
/// </summary>
public override double RelativeError { get; }

/// <summary>
/// Gets the confidence interval used to initialize this <see cref="CountMinSketch{T}"/>
/// </summary>
public override double Confidence { get; }

/// <summary>
/// The number of buckets used for tracking items.
/// </summary>
public override int Depth => _depth;

/// <summary>
/// Gets the size of each bucket used for tracking frequency of items.
/// </summary>
public override int Width => _width;

/// <summary>
/// Gets the total number of items in this collection.
/// </summary>
public override long TotalCount => _totalCount;

/// <summary>
/// Gets or sets the table that is currently being used to track frequency of items.
/// </summary>
public override long[,] Table => _table;

/// <summary>
/// Adds a new item to the collection.
/// </summary>
/// <param name="item">The item to be added to the collection</param>
public override void Add(T item) => Add(item, 1);

/// <summary>
/// Adds an item to the collection a specified number of times
/// </summary>
/// <param name="item">The item to be added</param>
/// <param name="count">The number of times the item should be added</param>
public void Add(T item, long count)
{
var buckets = GetHashBuckets(item, Depth, Width);
Expand All @@ -75,6 +121,11 @@ public void Add(T item, long count)
_totalCount += count;
}

/// <summary>
/// Estimates the number of times an item has been added to this <see cref="CountMinSketch{T}"/>
/// </summary>
/// <param name="item">The item to check</param>
/// <returns>An estimated number of times that the item has been added to the collection. This will never be low but could be higher than the actual result.</returns>
public override long EstimateCount(T item)
{
var res = long.MaxValue;
Expand All @@ -98,6 +149,11 @@ private static int[] GetHashBuckets(T item, int hashCount, int max)
return result;
}

/// <summary>
/// Merges another instance of <see cref="CountMinSketch{T}"/> with this collection. The results will be an aggregate of both collections after merging.
/// </summary>
/// <param name="other">The <see cref="CountMinSketchBase{T}"/> that should be merged into the current collection.</param>
/// <returns>This <see cref="CountMinSketch{T}"/> with the results from the other collection included.</returns>
public override CountMinSketchBase<T> MergeInPlace(CountMinSketchBase<T> other)
{
if (other == null) { throw new IncompatibleMergeException("Cannot merge null estimator"); }
Expand Down
4 changes: 4 additions & 0 deletions Maybe/CountMinSketch/CountMinSketchBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

namespace Maybe.CountMinSketch
{
/// <summary>
/// An abstract class representing a general Count-Min Sketch data structure.
/// </summary>
/// <typeparam name="T"></typeparam>
[Serializable]
public abstract class CountMinSketchBase<T>
{
Expand Down
15 changes: 15 additions & 0 deletions Maybe/CountMinSketch/IncompatibleMergeException.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,27 @@

namespace Maybe.CountMinSketch
{
/// <summary>
/// Represent an error encountered when merging <see cref="CountMinSketchBase{T}"/>
/// </summary>
public class IncompatibleMergeException : Exception
{
/// <summary>
/// Creates a new instance of this exception.
/// </summary>
public IncompatibleMergeException() { }

/// <summary>
/// Creates a new instance of this exception with a custom error message
/// </summary>
/// <param name="message">The message to be set on the exception</param>
public IncompatibleMergeException(string message) : base(message) { }

/// <summary>
/// Creates a new instance of this exception with a custom error message and an inner exception for context.
/// </summary>
/// <param name="message">The message to be set on the exception.</param>
/// <param name="inner">The inner exception to be included on the exception.</param>
public IncompatibleMergeException(string message, Exception inner) : base(message, inner) { }
}
}
5 changes: 5 additions & 0 deletions Maybe/Maybe.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<WarningsAsErrors />
<DocumentationFile>bin\Debug\netstandard2.0\Maybe.xml</DocumentationFile>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netstandard2.0|AnyCPU'">
<DocumentationFile>bin\Release\netstandard2.0\Maybe.xml</DocumentationFile>
</PropertyGroup>

<ItemGroup Condition="'$(TargetFramework)' == 'net20'">
Expand Down
16 changes: 16 additions & 0 deletions Maybe/SkipList/Node.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,28 @@

namespace Maybe.SkipList
{
/// <summary>
/// Represents a single node on a SkipList -- Contains a value and a set of follow up nodes at various levels.
/// </summary>
/// <typeparam name="T"></typeparam>
[Serializable]
public class Node<T>
{
/// <summary>
/// Nodes that follow this current node at a given level (where the array index is the level)
/// </summary>
public Node<T>[] Next { get; }

/// <summary>
/// The value of this node.
/// </summary>
public T Value { get; }

/// <summary>
/// Creates a new instance of this node.
/// </summary>
/// <param name="value">The value of the node.</param>
/// <param name="level">The level where the node is stored in the <see cref="SkipList{T}"/> tree</param>
public Node(T value, int level)
{
if(level < 0) { throw new ArgumentException("Level must be >= 0!", nameof(level)); }
Expand Down
14 changes: 14 additions & 0 deletions Maybe/SkipList/SkipList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ public SkipList(IComparer<T> comparer=null)
}
}

/// <summary>
/// Protected constructor used to deserialize an instance of <see cref="SkipList{T}"/>
/// </summary>
/// <param name="info"></param>
/// <param name="context"></param>
protected SkipList(SerializationInfo info, StreamingContext context)
{
_headNode = (Node<T>) info.GetValue("headNode", typeof(Node<T>));
Expand Down Expand Up @@ -141,6 +146,10 @@ IEnumerator IEnumerable.GetEnumerator()
return GetEnumerator();
}

/// <summary>
/// Enumerates all nodes of this collection
/// </summary>
/// <returns></returns>
public IEnumerator<T> GetEnumerator()
{
var currentNode = _headNode.Next[0];
Expand All @@ -156,6 +165,11 @@ public IEnumerator<T> GetEnumerator()
}
}

/// <summary>
/// Helper method for serialization of this class."/>
/// </summary>
/// <param name="info"></param>
/// <param name="context"></param>
public virtual void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("headNode", _headNode);
Expand Down

0 comments on commit a37dcff

Please sign in to comment.