Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 11 additions & 44 deletions doc/articles/block_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,54 +8,22 @@
from itertools import repeat
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from arraykit import BlockIndex
# from arraykit import ErrorInitTypeBlocks
from arraykit import shape_filter
from arraykit import resolve_dtype

import arraykit as ak

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sys.path.append(os.getcwd())

from performance.reference.block_index import from_blocks


def from_blocks(
raw_blocks: tp.Iterable[np.ndarray],
):
index: tp.List[tp.Tuple[int, int]] = [] # columns position to blocks key
block_count = 0
row_count = None
column_count = 0
dtype = None

for block in raw_blocks:
if not block.__class__ is np.ndarray:
raise ErrorInitTypeBlocks(f'found non array block: {block}')
if block.ndim > 2:
raise ErrorInitTypeBlocks(f'cannot include array with {block.ndim} dimensions')

r, c = shape_filter(block)

if row_count is not None and r != row_count: #type: ignore [unreachable]
raise ErrorInitTypeBlocks(f'mismatched row count: {r}: {row_count}')
else:
row_count = r
if c == 0:
continue

if dtype is None:
dtype = block.dtype
else:
dtype = resolve_dtype(dtype, block.dtype)

for i in range(c):
index.append((block_count, i))
column_count += c
block_count += 1
return (row_count, column_count), index

#-------------------------------------------------------------------------------

class ArrayProcessor:
NAME = ''
Expand All @@ -78,6 +46,7 @@ def __init__(self, arrays: tp.Iterable[np.ndarray]):
self.selector_bool_array = (np.arange(len(self.bi)) % 2) == 0
self.selector_slice = slice(0, len(self.bi), 2)


#-------------------------------------------------------------------------------
class BlockIndexLoad(ArrayProcessor):
NAME = 'BlockIndex: load'
Expand Down Expand Up @@ -223,13 +192,11 @@ class TupleIndexIterBoolArray(ArrayProcessor):

def __call__(self):
ti = self.ti
_ = [ti[i] for i in self.selector_bool_array if i]


_ = [ti[i] for i, b in enumerate(self.selector_bool_array) if b]


#-------------------------------------------------------------------------------
NUMBER = 50
NUMBER = 5

def seconds_to_display(seconds: float) -> str:
seconds /= NUMBER
Expand Down
190 changes: 190 additions & 0 deletions performance/reference/block_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@

from arraykit import shape_filter
from arraykit import resolve_dtype

import typing as tp
import numpy as np

#-------------------------------------------------------------------------------
def from_blocks(
raw_blocks: tp.Iterable[np.ndarray],
):
'''Simulation of legacy routine within TypeBlocks.
'''
index: tp.List[tp.Tuple[int, int]] = [] # columns position to blocks key
block_count = 0
row_count = None
column_count = 0
dtype = None

for block in raw_blocks:
if not block.__class__ is np.ndarray:
raise ErrorInitTypeBlocks(f'found non array block: {block}')
if block.ndim > 2:
raise ErrorInitTypeBlocks(f'cannot include array with {block.ndim} dimensions')

r, c = shape_filter(block)

if row_count is not None and r != row_count: #type: ignore [unreachable]
raise ErrorInitTypeBlocks(f'mismatched row count: {r}: {row_count}')
else:
row_count = r
if c == 0:
continue

if dtype is None:
dtype = block.dtype
else:
dtype = resolve_dtype(dtype, block.dtype)

for i in range(c):
index.append((block_count, i))
column_count += c
block_count += 1
return (row_count, column_count), index

#-------------------------------------------------------------------------------


def cols_to_slice(indices: tp.Sequence[int]) -> slice:
'''Translate an iterable of contiguous integers into a slice.
Integers are assumed to be ordered (ascending or descending) and contiguous.
'''
start_idx = indices[0]
# single column as a single slice
if len(indices) == 1:
return slice(start_idx, start_idx + 1)

stop_idx = indices[-1]
if stop_idx > start_idx: # ascending indices
return slice(start_idx, stop_idx + 1)

if stop_idx == 0:
return slice(start_idx, None, -1)
# stop is less than start, need to reduce by 1 to cover range
return slice(start_idx, stop_idx - 1, -1)

def indices_to_contiguous_pairs(indices: tp.Iterable[tp.Tuple[int, int]]
) -> tp.Iterator[tp.Tuple[int, slice]]:
'''Indices are pairs of (block_idx, value); convert these to pairs of (block_idx, slice) when we identify contiguous indices
within a block (these are block slices)
'''
# store pairs of block idx, ascending col list
last: tp.Optional[tp.Tuple[int, int]] = None

for block_idx, col in indices:
if not last:
last = (block_idx, col)
bundle = [col]
continue
if last[0] == block_idx and abs(col - last[1]) == 1:
# if contiguous, update last, add to bundle
last = (block_idx, col)
# do not need to store all col, only the last,
# however probably easier to just accumulate all
bundle.append(col)
continue
# either new block, or not contiguous on same block
yield (last[0], cols_to_slice(bundle))
# start a new bundle
bundle = [col]
last = (block_idx, col)

# last can be None
if last and bundle:
yield (last[0], cols_to_slice(bundle))


class IterContiguous:
def __init__(self, indices):
self.indices = iter(indices)
self.last_block = -1
self.last_column = -1
self.next_block = -1
self.next_column = -1

@staticmethod
def build_slice(start, end_inclusive):
# this works, but we reatain slices to force 2D selections; we might explore changing this
# if start == end_inclusive:
# return start

if start <= end_inclusive:
return slice(start, end_inclusive + 1, None) # can be 1
# reverse slice
if end_inclusive == 0:
return slice(start, None, -1)
return slice(start, end_inclusive - 1, -1)

def getter(self) -> tp.Tuple[int, slice]:
slice_start = -1
while True:
if self.next_block == -2:
return None # terminate the loop
if self.next_block != -1:
# discontinuity found on last iteration, set new start
self.last_block = self.next_block
self.last_column = self.next_column
slice_start = self.last_column
self.next_block = -1 # clear next state
self.next_column = -1

try:
block, column = next(self.indices)
except StopIteration:
# no more pairs, but set a previous slice_start that has not been emitted
# return that now, and flag for end on next call
self.next_block = -2
return self.last_block, self.build_slice(slice_start, self.last_column)

if self.last_block == -1:
# initialization
self.last_block = block
self.last_column = column
slice_start = column
continue

if self.last_block == block and abs(column - self.last_column) == 1: # contiguous
self.last_column = column
continue

# not contiguous, need to emit a slice for previous region
# store this block, column as next, so we have
self.next_block = block
self.next_column = column
return self.last_block, self.build_slice(slice_start, self.last_column)


def iter(self) -> tp.Iterator[tp.Tuple[int, slice]]:
while True:
post = self.getter()
if post is not None:
yield post
else:
break

#-------------------------------------------------------------------------------



if __name__ == '__main__':
samples = (
[(0, 0), (0, 1), (0, 2), (1, 1), (1, 3), (2, 0), (3, 0), (3, 1), (3, 2)],
[(0, 0), (2, 1), (3, 5), (10, 1)],
[(0, 0), (2, 1), (2, 2), (2, 5), (2, 6), (10, 1)],
[(10, 1)],
[(0, 1), (0, 2), (0, 3), (0, 4)],
[(0, 0), (2, 3), (2, 2), (2, 1), (2, 6), (10, 1)],
[(2, 3), (0, 0), (2, 2), (2, 1), (2, 6), (2, 7)],
[(2, 3), (2, 2), (5, 2), (5, 1), (5, 0), (2, 1), (2, 0)],

)
for sample in samples:
p1 = list(indices_to_contiguous_pairs(sample))
print(sample)
print(p1)


iterc = IterContiguous(sample)
p2 = list(iterc.iter())
print(p2)
6 changes: 6 additions & 0 deletions src/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ class BlockIndex:
def iter_select(self,
__key: tp.Union[slice, np.ndarray, tp.List[int]],
) -> tp.Iterator[tp.Tuple[int, int]]: ...
def iter_contiguous(self,
__key: tp.Union[slice, np.ndarray, tp.List[int]],
ascending: bool = False,
reduce: bool = False,
) -> tp.Iterator[tp.Tuple[int, int]]: ...


def iterable_str_to_array_1d(
iterable: tp.Iterable[str],
Expand Down
Loading