From 84a7e325e805ef9ba125b72ed54261359d4b6161 Mon Sep 17 00:00:00 2001 From: callmeGoldenboy Date: Tue, 2 Mar 2021 18:43:59 +0100 Subject: [PATCH 1/9] feat:(first draft for the misra gries algorithm) #1 --- algorithms/streaming/misra_greis.py | 47 +++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 algorithms/streaming/misra_greis.py diff --git a/algorithms/streaming/misra_greis.py b/algorithms/streaming/misra_greis.py new file mode 100644 index 000000000..43534b4f8 --- /dev/null +++ b/algorithms/streaming/misra_greis.py @@ -0,0 +1,47 @@ + +""" +Implementation of the Misra-Gries algorithm. +Given a list of items and a value k, it returns the every item in the list that appears at least n/k times, where n is the length of the array +By default, k is set to 2, solving the majority problem. +For the majority problem, this algorithm only guarantees that if there is an element that appears more than n/2 times, it will be outputed. If there +is no such element, any arbitrary element is returned by the algorithm. Therefore, we need to iterate through again at the end. But since we have filtred +out the suspects, the memory complexity is significantly lower than it would be to create counter for every element in the list. + +""" +def misra_gries(array,k=2): + keys = {} + for i in range(len(array)): + val = str(array[i]) + if val in keys: + keys[val] = keys[val] + 1 + + elif len(keys) < k - 1: + keys[val] = 1 + + else: + for key in list(keys): + keys[key] = keys[key] - 1 + if keys[key] == 0: + del keys[key] + + suspects = keys.keys() + frequencies = {} + for suspect in suspects: + freq = _count_frequency(array,int(suspect)) + if freq >= len(array) / k: + frequencies[suspect] = freq + + return frequencies if len(frequencies) > 0 else None + + +def _count_frequency(array,element): + return array.count(element) + +if __name__ == "__main__": + test_true = [1,4,5,4,4,5,4,4] + test_true2 = [1,1,3,1,3,3,4,2,6,1,3] + test_false = [1,1,3,3,2,3,4,5,6,7,8] + print(misra_gries(test_true,3)) + print(misra_gries(test_true))) + print(misra_gries(test_false)) + From 54f94988fd4d5c34c3dadc07e5f93520ee2c793c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Renstr=C3=B6m?= Date: Wed, 3 Mar 2021 09:16:42 +0100 Subject: [PATCH 2/9] feat:(Added examples and changed to correct name) #1 --- .../{misra_greis.py => misra_gries.py} | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) rename algorithms/streaming/{misra_greis.py => misra_gries.py} (83%) diff --git a/algorithms/streaming/misra_greis.py b/algorithms/streaming/misra_gries.py similarity index 83% rename from algorithms/streaming/misra_greis.py rename to algorithms/streaming/misra_gries.py index 43534b4f8..31339094d 100644 --- a/algorithms/streaming/misra_greis.py +++ b/algorithms/streaming/misra_gries.py @@ -7,8 +7,17 @@ is no such element, any arbitrary element is returned by the algorithm. Therefore, we need to iterate through again at the end. But since we have filtred out the suspects, the memory complexity is significantly lower than it would be to create counter for every element in the list. +For example: +Input misras_gries([1,4,4,4,5,4,4]) +Output {'4':5} +Input misras_gries([0,0,0,1,1,1,1]) +Output {'1':4} +Input misras_gries([0,0,0,0,1,1,1,2,2],3) +Output {'0':4,'1':3} +Input misras_gries([0,0,0,1,1,1] +Output None """ -def misra_gries(array,k=2): +def misras_gries(array,k=2): keys = {} for i in range(len(array)): val = str(array[i]) @@ -37,11 +46,4 @@ def misra_gries(array,k=2): def _count_frequency(array,element): return array.count(element) -if __name__ == "__main__": - test_true = [1,4,5,4,4,5,4,4] - test_true2 = [1,1,3,1,3,3,4,2,6,1,3] - test_false = [1,1,3,3,2,3,4,5,6,7,8] - print(misra_gries(test_true,3)) - print(misra_gries(test_true))) - print(misra_gries(test_false)) From a2bff6719a74293716a1ddbad3137431e44e9743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Renstr=C3=B6m?= Date: Wed, 3 Mar 2021 09:21:27 +0100 Subject: [PATCH 3/9] feat:(Added init file for testing) #2 --- algorithms/streaming/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 algorithms/streaming/__init__.py diff --git a/algorithms/streaming/__init__.py b/algorithms/streaming/__init__.py new file mode 100644 index 000000000..e69de29bb From bcb586055b51b46bd5d6c562b711206ace964ed3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Renstr=C3=B6m?= Date: Wed, 3 Mar 2021 09:22:26 +0100 Subject: [PATCH 4/9] test:(Added tests for misras_gries function) #2 --- tests/test_streaming.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/test_streaming.py diff --git a/tests/test_streaming.py b/tests/test_streaming.py new file mode 100644 index 000000000..d6bedb2c9 --- /dev/null +++ b/tests/test_streaming.py @@ -0,0 +1,16 @@ +from algorithms.streaming.misra_gries import ( + misras_gries, +) +import unittest + +class TestMisraGreis(unittest.TestCase): + def test_misra_correct(self): + self.assertEqual({'4':5},misras_gries([1,4,4,4,5,4,4])) + self.assertEqual({'1':4},misras_gries([0,0,0,1,1,1,1])) + self.assertEqual({'0':4,'1':3},misras_gries([0,0,0,0,1,1,1,2,2],3)) + + def test_misra_incorrect(self): + self.assertEqual(None,misras_gries([1,2,5,4,5,4,4,5,4,4,5])) + self.assertEqual(None,misras_gries([0,0,0,2,1,1,1])) + self.assertEqual(None,misras_gries([0,0,0,1,1,1])) + \ No newline at end of file From 780ca2374aa2c6180d7d0d6a6f10c67032abdb40 Mon Sep 17 00:00:00 2001 From: callmeGoldenboy Date: Wed, 3 Mar 2021 13:23:55 +0100 Subject: [PATCH 5/9] feat:(add 1-sparse recovery algorithm) #7 --- algorithms/streaming/1_sparse_recovery.py | 64 +++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 algorithms/streaming/1_sparse_recovery.py diff --git a/algorithms/streaming/1_sparse_recovery.py b/algorithms/streaming/1_sparse_recovery.py new file mode 100644 index 000000000..058942b16 --- /dev/null +++ b/algorithms/streaming/1_sparse_recovery.py @@ -0,0 +1,64 @@ + +""" +1-sparse recovery problem. This algorithm assumes we have a non negative dynamic stream. +Given a stream of tuples, where each tuple contains a number and a sign (+/-), it check if the stream is 1-sparse, meaning if the elements +in the stream cancel eacheother out in such a way that ther is only a unique number at the end. + +Examples: +if the stream consists of [(4,'+'), (2,'+'),(2,'-'),(4,'+'),(3,'+'),(3,'-')], the algorithm will return 4, since the 2s and 3s will cancel eachother out +and there will only be 4s left + +if the stream consists of [(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+')], the algorithm returns 2, since the stream only consists of the same number and sign + +if the stream consists of [(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(1,'+')], the algorithm returns None, since there will be 2 different number remaining + +""" + +def one_sparse(array): + sum_signs = 0 + bitsum = [0]*32 + sum_values = 0 + for element in array: + val,sign = element + if sign == "+": + sum_signs += 1 + sum_values += val + else: + sum_signs -= 1 + sum_values -= val + + bitsum = _get_bit_sum(bitsum,val,sign) + + if sum_signs > 0 and _check_every_number_in_bitsum(bitsum,sum_signs): + return int(sum_values/sum_signs) + else: + return None + + +#Helper function to check that every entry in the list is either 0 or the same as the +#sum of signs +def _check_every_number_in_bitsum(bitsum,sum_signs): + for val in bitsum: + if val == sum_signs or val == 0: + continue + else: + return False + return True + + +#Given 2 lists, representing 2 binary numbers, return either the sum or difference at each entry in the list +def _get_bit_sum(sum_bits,new_value,sign): + bit_repr = [int(x) for x in bin(new_value)[2:]] + while len(bit_repr) < 32: + bit_repr.insert(0,0) + assert len(bit_repr) == len(sum_bits) + if sign == "+": + for i in range(len(bit_repr)-1,-1,-1): + sum_bits[i] += bit_repr[i] + + if sign == "-": + for i in range(len(bit_repr)-1,-1,-1): + sum_bits[i] -= bit_repr[i] + + return sum_bits + From d7cdaa39f4c0ac282c860c268d6d25efb507cd45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Renstr=C3=B6m?= Date: Fri, 5 Mar 2021 22:09:03 +0100 Subject: [PATCH 6/9] Add finalized 1-sparse-recovery algorithm --- algorithms/streaming/1_sparse_recovery.py | 75 +++++++++++------------ algorithms/streaming/misra_gries.py | 49 --------------- tests/test_streaming.py | 16 ----- 3 files changed, 36 insertions(+), 104 deletions(-) delete mode 100644 algorithms/streaming/misra_gries.py delete mode 100644 tests/test_streaming.py diff --git a/algorithms/streaming/1_sparse_recovery.py b/algorithms/streaming/1_sparse_recovery.py index 058942b16..084a9f7b8 100644 --- a/algorithms/streaming/1_sparse_recovery.py +++ b/algorithms/streaming/1_sparse_recovery.py @@ -1,64 +1,61 @@ - -""" -1-sparse recovery problem. This algorithm assumes we have a non negative dynamic stream. -Given a stream of tuples, where each tuple contains a number and a sign (+/-), it check if the stream is 1-sparse, meaning if the elements +""" Non-negative 1-sparse recovery problem. This algorithm assumes we have a non negative dynamic stream. +Given a stream of tuples, where each tuple contains a number and a sign (+/-), it check if the stream is 1-sparse, meaning if the elements in the stream cancel eacheother out in such a way that ther is only a unique number at the end. Examples: -if the stream consists of [(4,'+'), (2,'+'),(2,'-'),(4,'+'),(3,'+'),(3,'-')], the algorithm will return 4, since the 2s and 3s will cancel eachother out -and there will only be 4s left - -if the stream consists of [(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+')], the algorithm returns 2, since the stream only consists of the same number and sign - -if the stream consists of [(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(1,'+')], the algorithm returns None, since there will be 2 different number remaining - +#1 +Input: [(4,'+'), (2,'+'),(2,'-'),(4,'+'),(3,'+'),(3,'-')], +Output: 4 +Comment: Since 2 and 3 gets removed. +#2 +Input: [(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+')] +Output: 2 +Comment: No other numbers present +#3 +Input: [(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(1,'+')] +Output: None +Comment: Not 1-sparse """ def one_sparse(array): - sum_signs = 0 - bitsum = [0]*32 - sum_values = 0 - for element in array: - val,sign = element - if sign == "+": + sum_signs = 0 + bitsum = [0]*32 + sum_values = 0 + for val,sign in array: + if sign == "+": sum_signs += 1 sum_values += val else: - sum_signs -= 1 + sum_signs -= 1 sum_values -= val - - bitsum = _get_bit_sum(bitsum,val,sign) + + _get_bit_sum(bitsum,val,sign) if sum_signs > 0 and _check_every_number_in_bitsum(bitsum,sum_signs): return int(sum_values/sum_signs) else: return None - #Helper function to check that every entry in the list is either 0 or the same as the #sum of signs def _check_every_number_in_bitsum(bitsum,sum_signs): - for val in bitsum: - if val == sum_signs or val == 0: - continue - else: + for val in bitsum: + if val != 0 and val != sum_signs : return False return True - -#Given 2 lists, representing 2 binary numbers, return either the sum or difference at each entry in the list -def _get_bit_sum(sum_bits,new_value,sign): - bit_repr = [int(x) for x in bin(new_value)[2:]] - while len(bit_repr) < 32: - bit_repr.insert(0,0) - assert len(bit_repr) == len(sum_bits) +# Adds bit representation value to bitsum array +def _get_bit_sum(bitsum,val,sign): + i = 0 if sign == "+": - for i in range(len(bit_repr)-1,-1,-1): - sum_bits[i] += bit_repr[i] + while(val): + bitsum[i] += val & 1 + i +=1 + val >>=1 + else : + while(val): + bitsum[i] -= val & 1 + i +=1 + val >>=1 - if sign == "-": - for i in range(len(bit_repr)-1,-1,-1): - sum_bits[i] -= bit_repr[i] - return sum_bits - diff --git a/algorithms/streaming/misra_gries.py b/algorithms/streaming/misra_gries.py deleted file mode 100644 index 31339094d..000000000 --- a/algorithms/streaming/misra_gries.py +++ /dev/null @@ -1,49 +0,0 @@ - -""" -Implementation of the Misra-Gries algorithm. -Given a list of items and a value k, it returns the every item in the list that appears at least n/k times, where n is the length of the array -By default, k is set to 2, solving the majority problem. -For the majority problem, this algorithm only guarantees that if there is an element that appears more than n/2 times, it will be outputed. If there -is no such element, any arbitrary element is returned by the algorithm. Therefore, we need to iterate through again at the end. But since we have filtred -out the suspects, the memory complexity is significantly lower than it would be to create counter for every element in the list. - -For example: -Input misras_gries([1,4,4,4,5,4,4]) -Output {'4':5} -Input misras_gries([0,0,0,1,1,1,1]) -Output {'1':4} -Input misras_gries([0,0,0,0,1,1,1,2,2],3) -Output {'0':4,'1':3} -Input misras_gries([0,0,0,1,1,1] -Output None -""" -def misras_gries(array,k=2): - keys = {} - for i in range(len(array)): - val = str(array[i]) - if val in keys: - keys[val] = keys[val] + 1 - - elif len(keys) < k - 1: - keys[val] = 1 - - else: - for key in list(keys): - keys[key] = keys[key] - 1 - if keys[key] == 0: - del keys[key] - - suspects = keys.keys() - frequencies = {} - for suspect in suspects: - freq = _count_frequency(array,int(suspect)) - if freq >= len(array) / k: - frequencies[suspect] = freq - - return frequencies if len(frequencies) > 0 else None - - -def _count_frequency(array,element): - return array.count(element) - - diff --git a/tests/test_streaming.py b/tests/test_streaming.py deleted file mode 100644 index d6bedb2c9..000000000 --- a/tests/test_streaming.py +++ /dev/null @@ -1,16 +0,0 @@ -from algorithms.streaming.misra_gries import ( - misras_gries, -) -import unittest - -class TestMisraGreis(unittest.TestCase): - def test_misra_correct(self): - self.assertEqual({'4':5},misras_gries([1,4,4,4,5,4,4])) - self.assertEqual({'1':4},misras_gries([0,0,0,1,1,1,1])) - self.assertEqual({'0':4,'1':3},misras_gries([0,0,0,0,1,1,1,2,2],3)) - - def test_misra_incorrect(self): - self.assertEqual(None,misras_gries([1,2,5,4,5,4,4,5,4,4,5])) - self.assertEqual(None,misras_gries([0,0,0,2,1,1,1])) - self.assertEqual(None,misras_gries([0,0,0,1,1,1])) - \ No newline at end of file From ca63faf2d6245f2bcf4e9d0918a136190478eadb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Renstr=C3=B6m?= Date: Fri, 5 Mar 2021 22:17:22 +0100 Subject: [PATCH 7/9] Renamed sparse function name to work with import --- README.md | 2 ++ .../streaming/{1_sparse_recovery.py => one_sparse_recovery.py} | 0 2 files changed, 2 insertions(+) rename algorithms/streaming/{1_sparse_recovery.py => one_sparse_recovery.py} (100%) diff --git a/README.md b/README.md index c14e68468..ffe602b01 100644 --- a/README.md +++ b/README.md @@ -305,6 +305,8 @@ If you want to uninstall algorithms, it is as simple as: - [is_consecutive](algorithms/stack/is_consecutive.py) - [remove_min](algorithms/stack/remove_min.py) - [is_sorted](algorithms/stack/is_sorted.py) +- [streaming](algorithms/streaming) + - [1-sparse-recovery](algorithms/streaming/one_sparse_recovery.py) - [strings](algorithms/strings) - [fizzbuzz](algorithms/strings/fizzbuzz.py) - [delete_reoccurring](algorithms/strings/delete_reoccurring.py) diff --git a/algorithms/streaming/1_sparse_recovery.py b/algorithms/streaming/one_sparse_recovery.py similarity index 100% rename from algorithms/streaming/1_sparse_recovery.py rename to algorithms/streaming/one_sparse_recovery.py From b1d86d9605f3df848917ddfe2f391ffbed24f2eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Renstr=C3=B6m?= Date: Fri, 5 Mar 2021 22:21:41 +0100 Subject: [PATCH 8/9] Tests added for 1-sparse-recovery function --- algorithms/streaming/__init__.py | 1 + tests/test_streaming.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/test_streaming.py diff --git a/algorithms/streaming/__init__.py b/algorithms/streaming/__init__.py index e69de29bb..ba7a84ea8 100644 --- a/algorithms/streaming/__init__.py +++ b/algorithms/streaming/__init__.py @@ -0,0 +1 @@ +from .one_sparse_recovery import * \ No newline at end of file diff --git a/tests/test_streaming.py b/tests/test_streaming.py new file mode 100644 index 000000000..13b3bedee --- /dev/null +++ b/tests/test_streaming.py @@ -0,0 +1,15 @@ + from algorithms.streaming import ( + one_sparse +) +import unittest + +class TestOneSparse(unittest.TestCase): + def test_one_sparse_correct(self): + self.assertEqual(4,one_sparse([(4,'+'), (2,'+'),(2,'-'),(4,'+'),(3,'+'),(3,'-')])) + self.assertEqual(2,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+')])) + + + def test_one_sparse_incorrect(self): + self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(1,'+')])) #Two values remaining + self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'-'),(2,'-'),(2,'-'),(2,'-')])) # No values remaining + self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(4,'+'),(4,'+')])) # Bitsum sum of sign is inccorect \ No newline at end of file From 0355e09339e5991f1aa91ccc25910ecdfd0b4bde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Renstr=C3=B6m?= Date: Fri, 5 Mar 2021 22:24:16 +0100 Subject: [PATCH 9/9] Tests added for 1-sparse-recovery function --- algorithms/streaming/__init__.py | 1 + tests/test_streaming.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/test_streaming.py diff --git a/algorithms/streaming/__init__.py b/algorithms/streaming/__init__.py index e69de29bb..ba7a84ea8 100644 --- a/algorithms/streaming/__init__.py +++ b/algorithms/streaming/__init__.py @@ -0,0 +1 @@ +from .one_sparse_recovery import * \ No newline at end of file diff --git a/tests/test_streaming.py b/tests/test_streaming.py new file mode 100644 index 000000000..8662480e6 --- /dev/null +++ b/tests/test_streaming.py @@ -0,0 +1,15 @@ +from algorithms.streaming import ( + one_sparse +) +import unittest + +class TestOneSparse(unittest.TestCase): + def test_one_sparse_correct(self): + self.assertEqual(4,one_sparse([(4,'+'), (2,'+'),(2,'-'),(4,'+'),(3,'+'),(3,'-')])) + self.assertEqual(2,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+')])) + + + def test_one_sparse_incorrect(self): + self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(1,'+')])) #Two values remaining + self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'-'),(2,'-'),(2,'-'),(2,'-')])) # No values remaining + self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(4,'+'),(4,'+')])) # Bitsum sum of sign is inccorect \ No newline at end of file