Skip to content

Commit

Permalink
0.6.15
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Oct 5, 2023
1 parent 61dde91 commit 71fa7bb
Show file tree
Hide file tree
Showing 12 changed files with 231 additions and 707 deletions.
7 changes: 0 additions & 7 deletions .github/workflows/regression_suite.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,6 @@ jobs:
fsouza/fake-gcs-server \
-scheme http
- name: Start Azurite
run: |
docker run -d -p 10000:10000 \
mcr.microsoft.com/azure-storage/azurite \
azurite-blob \
--blobHost 0.0.0.0
- name: Run Regression Tests
run: coverage run -m pytest --color=yes
env:
Expand Down
21 changes: 7 additions & 14 deletions mabel/data/internals/collected_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,17 @@ def __init__(self, dictset: DictSet, column: str, dedupe: bool = False):
"""
collections: dict = {}

groups = dictset
if dedupe:
groups = dictset.distinct()
groups = dictset.distinct() if dedupe else dictset

for item in groups:
if hasattr(item, "as_dict"):
my_item = item.as_dict()
else:
my_item = item.copy()
my_item = item.copy()
key = my_item.pop(column, None)
if not key in collections:
collections[key] = []
collections[key].append(my_item)
collections.setdefault(key, []).append(my_item)
if dedupe:
for collection in collections:
collections[collection] = {
frozenset(item.items()): item for item in collections[collection]
}.values()
collections = {
k: {frozenset(i.items()): i for i in v}.values() for k, v in collections.items()
}

self._collections = collections

def count(self, collection=None):
Expand Down
55 changes: 14 additions & 41 deletions mabel/data/internals/dnf_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,6 @@
from mabel.utils.text import like
from mabel.utils.text import matches


def _in(x, y):
return x in y


def _nin(x, y):
return x not in y


def _con(x, y):
return y in x


def _ncon(x, y):
return y not in x


def true(x):
return True


# convert text representation of operators to functions
OPERATORS = {
"=": operator.eq,
Expand All @@ -47,11 +26,11 @@ def true(x):
"like": like,
"matches": matches,
"~": matches,
"in": _in,
"!in": _nin,
"not in": _nin,
"contains": _con,
"!contains": _ncon,
"in": lambda x, y: x in y,
"!in": lambda x, y: x not in y,
"not in": lambda x, y: x not in y,
"contains": lambda x, y: y in x,
"!contains": lambda x, y: y not in x,
}


Expand All @@ -75,20 +54,19 @@ def evaluate(predicate: Union[tuple, list], record: dict) -> bool:
# If we have a tuple extract out the key, operator and value and do the evaluation
if isinstance(predicate, tuple):
key, op, value = predicate
if key in record:
return OPERATORS[op.lower()](record[key], value)
return False
record_value = record.get(key, None)
return record_value is not None and OPERATORS[op.lower()](record_value, value)

if isinstance(predicate, list):
# Are all of the entries tuples?
# We AND them together (_all_ are True)
if all([isinstance(p, tuple) for p in predicate]):
return all([evaluate(p, record) for p in predicate])
if all(isinstance(p, tuple) for p in predicate):
return all(evaluate(p, record) for p in predicate)

# Are all of the entries lists?
# We OR them together (_any_ are True)
if all([isinstance(p, list) for p in predicate]):
return any([evaluate(p, record) for p in predicate])
if all(isinstance(p, list) for p in predicate):
return any(evaluate(p, record) for p in predicate)

# if we're here the structure of the filter is wrong
raise InvalidSyntaxError("Unable to evaluate Filter") # pragma: no cover
Expand Down Expand Up @@ -123,11 +101,8 @@ def __init__(self, filters: Optional[List[Tuple[str, str, object]]] = None):
filters = Filters([('name', '!=', 'john'),('name', '!=', 'tom')])
"""
if filters:
self.predicates = filters
self.empty_filter = False
else:
self.empty_filter = True
self.empty_filter = filters is None
self.predicates = filters if filters else []

def filter_dictset(self, dictset: Iterable[dict]) -> Iterable:
"""
Expand All @@ -143,9 +118,7 @@ def filter_dictset(self, dictset: Iterable[dict]) -> Iterable:
if self.empty_filter:
yield from dictset
else:
for record in dictset:
if evaluate(self.predicates, record):
yield record
yield from (record for record in dictset if evaluate(self.predicates, record))

def __call__(self, record) -> bool:
return evaluate(self.predicates, record)
118 changes: 0 additions & 118 deletions mabel/data/internals/index.py

This file was deleted.

18 changes: 7 additions & 11 deletions mabel/data/readers/internals/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
midway through the blob if required.
"""
import orjson
from orso.bitarray import BitArray
from orso.cityhash import CityHash64


Expand All @@ -29,8 +30,6 @@ def __init__(self, readable_blobs, cursor=None):
self.load_cursor(cursor)

def load_cursor(self, cursor):
from bitarray import bitarray

if cursor is None:
return

Expand All @@ -51,10 +50,9 @@ def load_cursor(self, cursor):
if len(find_partition) == 1:
self.partition = find_partition[0]
map_bytes = bytes.fromhex(cursor["map"])
blob_map = bitarray()
blob_map.frombytes(map_bytes)
blob_map = BitArray.from_array(map_bytes, len(map_bytes) * 8)
self.read_blobs = [
self.readable_blobs[i] for i in range(len(self.readable_blobs)) if blob_map[i]
self.readable_blobs[i] for i in range(len(self.readable_blobs)) if blob_map.get(i)
]

def next_blob(self, previous_blob=None):
Expand Down Expand Up @@ -94,13 +92,11 @@ def get(self):
}

def __getitem__(self, item):
from bitarray import bitarray

if item == "map":
blob_map = bitarray(
"".join(["1" if blob in self.read_blobs else "0" for blob in self.readable_blobs])
)
return blob_map.tobytes().hex()
blob_map = BitArray(len(self.readable_blobs))
for i, blob in enumerate(self.readable_blobs):
blob_map.set(i, blob in self.read_blobs)
return blob_map.array.hex()
if item == "partition":
return CityHash64(self.partition)
if item == "location":
Expand Down
Loading

0 comments on commit 71fa7bb

Please sign in to comment.