Skip to content

Commit

Permalink
FIX/#199
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Jun 18, 2022
1 parent 3418db4 commit 6d573e5
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 17 deletions.
22 changes: 18 additions & 4 deletions opteryx/engine/planner/operations/show_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

MAX_COLLECTOR: int = 17


def myhash(any):
from cityhash import CityHash64

Expand Down Expand Up @@ -205,21 +206,34 @@ def _extended_collector(pages):
[i for i in column_data if i not in (None, numpy.nan)]
)

if _type in (OPTERYX_TYPES.VARCHAR, OPTERYX_TYPES.NUMERIC, OPTERYX_TYPES.TIMESTAMP):
if _type in (
OPTERYX_TYPES.VARCHAR,
OPTERYX_TYPES.NUMERIC,
OPTERYX_TYPES.TIMESTAMP,
):
# hyperloglog estimates cardinality/uniqueness
hll = profile.get("hyperloglog")
if hll is None:
hll = hyperloglog.HyperLogLogPlusPlus(p=16)
[hll.update(value) for value in column_data]
profile["hyperloglog"] = hll

if _type in (OPTERYX_TYPES.BOOLEAN, OPTERYX_TYPES.VARCHAR, OPTERYX_TYPES.NUMERIC, OPTERYX_TYPES.TIMESTAMP):

if _type in (
OPTERYX_TYPES.BOOLEAN,
OPTERYX_TYPES.VARCHAR,
OPTERYX_TYPES.NUMERIC,
OPTERYX_TYPES.TIMESTAMP,
):
# counter is used to collect and count unique values
counter = profile.get("counter")
if counter is None:
counter = CountingTree()
if len(counter) < MAX_COLLECTOR:
[counter.insert(value) for value in column_data if len(counter) < MAX_COLLECTOR]
[
counter.insert(value)
for value in column_data
if len(counter) < MAX_COLLECTOR
]
profile["counter"] = counter

if _type in (OPTERYX_TYPES.NUMERIC, OPTERYX_TYPES.TIMESTAMP):
Expand Down
2 changes: 1 addition & 1 deletion opteryx/sketches/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import pyximport

pyximport.install()
pyximport.install()
2 changes: 1 addition & 1 deletion opteryx/third_party/abctree/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

pyximport.install()

from .abctree import ABCTree
from .abctree import ABCTree
2 changes: 1 addition & 1 deletion opteryx/third_party/hyperloglog/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .hyperloglog import HyperLogLog
from .hyperloglog import HyperLogLogPlusPlus
from .hyperloglog import HyperLogLogPlusPlus
10 changes: 5 additions & 5 deletions opteryx/third_party/hyperloglog/hyperloglog.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,8 @@ def deserialize(cls, buf):
)
except TypeError:
h.reg = numpy.array(
struct.unpack_from("%dB" % h.m, bytearray(buf), offset), dtype=numpy.int8
struct.unpack_from("%dB" % h.m, bytearray(buf), offset),
dtype=numpy.int8,
)
return h

Expand All @@ -306,7 +307,8 @@ def __setstate__(self, buf):
)
except TypeError:
self.reg = numpy.array(
struct.unpack_from("%dB" % self.m, bytearray(buf), offset), dtype=numpy.int8
struct.unpack_from("%dB" % self.m, bytearray(buf), offset),
dtype=numpy.int8,
)


Expand Down Expand Up @@ -338,9 +340,7 @@ class HyperLogLogPlusPlus(HyperLogLog):
_hash_range_byte = 8

def __init__(self, p=8, reg=None, hashfunc=CityHash64):
super(HyperLogLogPlusPlus, self).__init__(
p=p, reg=reg, hashfunc=hashfunc
)
super(HyperLogLogPlusPlus, self).__init__(p=p, reg=reg, hashfunc=hashfunc)

def _get_threshold(self, p):
return _thresholds[p - 4]
Expand Down
4 changes: 1 addition & 3 deletions opteryx/third_party/hyperloglog/tests/test_hll.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import sys
import os

sys.path.insert(1, os.path.join(sys.path[0], ".."))
from hyperloglog import HyperLogLog, HyperLogLogPlusPlus

Expand Down Expand Up @@ -126,9 +127,6 @@ def test_copy(self):
self.assertEqual(h1.hashfunc, h2.hashfunc)





class TestHyperLogLogPlusPlus(TestHyperLogLog):

_class = HyperLogLogPlusPlus
Expand Down
5 changes: 3 additions & 2 deletions tests/sketches/test_counting_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from opteryx.sketches.counting_tree import CountingTree


def test_counter():
pass

Expand All @@ -13,7 +14,7 @@ def test_counter():
ct = CountingTree()
for i in range(20):
for u in range(i):
#ct.insert(i)
# ct.insert(i)
ct.insert(u)

print(ct)
print(ct)

0 comments on commit 6d573e5

Please sign in to comment.