Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add misra-gries streaming algorithm #765

Merged
merged 8 commits into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ If you want to uninstall algorithms, it is as simple as:
- [is_sorted](algorithms/stack/is_sorted.py)
- [streaming](algorithms/streaming)
- [1-sparse-recovery](algorithms/streaming/one_sparse_recovery.py)
- [misra-gries](algorithms/streaming/misra_gries.py)
- [strings](algorithms/strings)
- [fizzbuzz](algorithms/strings/fizzbuzz.py)
- [delete_reoccurring](algorithms/strings/delete_reoccurring.py)
Expand Down
3 changes: 2 additions & 1 deletion algorithms/streaming/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .one_sparse_recovery import *
from .one_sparse_recovery import *
from .misra_gries import *
49 changes: 49 additions & 0 deletions algorithms/streaming/misra_gries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@

"""
Implementation of the Misra-Gries algorithm.
Given a list of items and a value k, it returns the every item in the list that appears at least n/k times, where n is the length of the array
By default, k is set to 2, solving the majority problem.
For the majority problem, this algorithm only guarantees that if there is an element that appears more than n/2 times, it will be outputed. If there
is no such element, any arbitrary element is returned by the algorithm. Therefore, we need to iterate through again at the end. But since we have filtred
out the suspects, the memory complexity is significantly lower than it would be to create counter for every element in the list.

For example:
Input misras_gries([1,4,4,4,5,4,4])
Output {'4':5}
Input misras_gries([0,0,0,1,1,1,1])
Output {'1':4}
Input misras_gries([0,0,0,0,1,1,1,2,2],3)
Output {'0':4,'1':3}
Input misras_gries([0,0,0,1,1,1]
Output None
"""
def misras_gries(array,k=2):
keys = {}
for i in range(len(array)):
val = str(array[i])
if val in keys:
keys[val] = keys[val] + 1

elif len(keys) < k - 1:
keys[val] = 1

else:
for key in list(keys):
keys[key] = keys[key] - 1
if keys[key] == 0:
del keys[key]

suspects = keys.keys()
frequencies = {}
for suspect in suspects:
freq = _count_frequency(array,int(suspect))
if freq >= len(array) / k:
frequencies[suspect] = freq

return frequencies if len(frequencies) > 0 else None


def _count_frequency(array,element):
return array.count(element)


19 changes: 17 additions & 2 deletions tests/test_streaming.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
from algorithms.streaming.misra_gries import (
misras_gries,
)
from algorithms.streaming import (
one_sparse
)
import unittest
import unittest


class TestMisraGreis(unittest.TestCase):
def test_misra_correct(self):
self.assertEqual({'4':5},misras_gries([1,4,4,4,5,4,4]))
self.assertEqual({'1':4},misras_gries([0,0,0,1,1,1,1]))
self.assertEqual({'0':4,'1':3},misras_gries([0,0,0,0,1,1,1,2,2],3))

def test_misra_incorrect(self):
self.assertEqual(None,misras_gries([1,2,5,4,5,4,4,5,4,4,5]))
self.assertEqual(None,misras_gries([0,0,0,2,1,1,1]))
self.assertEqual(None,misras_gries([0,0,0,1,1,1]))

class TestOneSparse(unittest.TestCase):
def test_one_sparse_correct(self):
Expand All @@ -12,4 +27,4 @@ def test_one_sparse_correct(self):
def test_one_sparse_incorrect(self):
self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(1,'+')])) #Two values remaining
self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'-'),(2,'-'),(2,'-'),(2,'-')])) # No values remaining
self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(4,'+'),(4,'+')])) # Bitsum sum of sign is inccorect
self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(4,'+'),(4,'+')])) # Bitsum sum of sign is inccorect