# utils.py

In [1]:
import vectorbt as vbt
from vectorbt.utils import *

In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
from numba import njit, f8, i8, b1, optional

In [3]:
v1 = 0
a1 = np.array([1])
a2 = np.array([1, 2, 3])
a3 = np.array([[1, 2, 3]])
a4 = np.array([[1], [2], [3]])
a5 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
sr1 = pd.Series([1], 
                index=pd.Index(['x1'], name='i1'), 
                name='a1')
print(sr1)
sr2 = pd.Series([1, 2, 3], 
                index=pd.Index(['x2', 'y2', 'z2'], name='i2'), 
                name='a2')
print(sr2)
df1 = pd.DataFrame([[1]], 
                   index=pd.Index(['x3'], name='i3'), 
                   columns=pd.Index(['a3'], name='c3'))
print(df1)
df2 = pd.DataFrame([[1], [2], [3]], 
                   index=pd.Index(['x4', 'y4', 'z4'], name='i4'), 
                   columns=pd.Index(['a4'], name='c4'))
print(df2)
df3 = pd.DataFrame([[1, 2, 3]], 
                   index=pd.Index(['x5'], name='i5'), 
                   columns=pd.Index(['a5', 'b5', 'c5'], name='c5'))
print(df3)
df4 = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], 
                   index=pd.Index(['x6', 'y6', 'z6'], name='i6'), 
                   columns=pd.Index(['a6', 'b6', 'c6'], name='c6'))
print(df4)

multi_i = pd.MultiIndex.from_arrays([['x7', 'y7', 'z7'], ['x8', 'y8', 'z8']], names=['i7', 'i8']) 
multi_c = pd.MultiIndex.from_arrays([['a7', 'b7', 'c7'], ['a8', 'b8', 'c8']], names=['c7', 'c8'])
df5 = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=multi_i, columns=multi_c)
print(df5)

i1
x1    1
Name: a1, dtype: int64
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64
c3  a3
i3    
x3   1
c4  a4
i4    
x4   1
y4   2
z4   3
c5  a5  b5  c5
i5            
x5   1   2   3
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9
c7    a7 b7 c7
c8    a8 b8 c8
i7 i8         
x7 x8  1  2  3
y7 y8  4  5  6
z7 z8  7  8  9


## Checks

In [4]:
print(is_series(v1))
print(is_series(a1))
print(is_series(sr1))
print(is_series(df1))

False
False
True
False


In [5]:
print(is_frame(v1))
print(is_frame(a1))
print(is_frame(sr1))
print(is_frame(df1))

False
False
False
True


In [6]:
print(is_pandas(v1))
print(is_pandas(a1))
print(is_pandas(sr1))
print(is_pandas(df1))

False
False
True
True


In [7]:
print(is_array(v1))
print(is_array(a1))
print(is_array(sr1))
print(is_array(df1))

False
True
False
False


In [8]:
print(is_array_like(v1))
print(is_array_like(a1))
print(is_array_like(sr1))
print(is_array_like(df1))

False
True
True
True


In [9]:
print(is_numba_func(lambda x: x))
print(is_numba_func(njit(lambda x: x)))

False
True


In [10]:
check_not_none(v1)

In [11]:
check_type(v1, int)
check_type(a1, np.ndarray)
check_type(sr1, (np.ndarray, pd.Series))

In [12]:
check_not_type(sr1, (int, pd.DataFrame))

In [13]:
check_same_type(v1, v1)
check_same_type(a1, a2)
check_same_type(sr1, sr1)
check_same_type(df1, df2)

In [14]:
check_dtype(a1, np.int)

In [15]:
check_same_dtype(v1, a1)
check_same_dtype(a1, df1)
check_same_dtype(df1, df2)
check_same_dtype(df2, df3)

In [16]:
check_ndim(v1, 0)
check_ndim(a1, 1)
check_ndim(df1, 2)

In [17]:
check_same_len([[1]], [[2]])

In [18]:
check_same_shape(a1, sr1)
check_same_shape(df2, df4, along_axis=0)
check_same_shape(df3, df4, along_axis=1)
check_same_shape(df2, df3, along_axis=(0, 1))

In [19]:
check_same_index(df3, df3)

In [20]:
check_same_columns(df3, df3)

In [21]:
check_same_meta(df3, df3)

In [22]:
check_same(df3, df3)

In [23]:
check_level_not_exists(df3, 'a')

## Index and columns

In [24]:
i1 = index_from_values([0.1, 0.2], name='a')
i2 = index_from_values(np.tile(np.arange(1, 4)[:, None][:, None], (1, 3, 3)), name='b')
i3 = index_from_values(np.random.uniform(size=(3, 3, 3)), name='c')

print(i1)
print(i2)
print(i3)

Float64Index([0.1, 0.2], dtype='float64', name='a')
Int64Index([1, 2, 3], dtype='int64', name='b')
Index(['mix_0', 'mix_1', 'mix_2'], dtype='object', name='c')


In [25]:
print(drop_redundant_levels(pd.Index(['a', 'a']))) # ignores levels with single element
print(drop_redundant_levels(pd.Index(['a', 'a'], name='hi')))
print(drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'a'], ['b', 'b']], names=['hi', 'hi2'])))
print(drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'b'], ['a', 'b']], names=['hi', 'hi2'])))
print(drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 1], ['a', 'b']], names=[None, 'hi2']))) # ignores 0-to-n
print(drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 2], ['a', 'b']], names=[None, 'hi2']))) # legit
print(drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 1], ['a', 'b']], names=['hi', 'hi2']))) # legit (w/ name)

Index(['a', 'a'], dtype='object')
Index(['a', 'a'], dtype='object', name='hi')
MultiIndex([('a', 'b'),
            ('a', 'b')],
           names=['hi', 'hi2'])
MultiIndex([('a', 'a'),
            ('b', 'b')],
           names=['hi', 'hi2'])
Index(['a', 'b'], dtype='object', name='hi2')
MultiIndex([(0, 'a'),
            (2, 'b')],
           names=[None, 'hi2'])
MultiIndex([(0, 'a'),
            (1, 'b')],
           names=['hi', 'hi2'])


In [26]:
print(drop_duplicate_levels(pd.MultiIndex.from_arrays(
    [[1, 2, 3], [1, 2, 3]], names=['a', 'a'])))
print(drop_duplicate_levels(pd.MultiIndex.from_tuples(
    [(0, 1, 2, 1), ('a', 'b', 'c', 'b')], names=['x', 'y', 'z', 'y']), keep='last'))
print(drop_duplicate_levels(pd.MultiIndex.from_tuples(
    [(0, 1, 2, 1), ('a', 'b', 'c', 'b')], names=['x', 'y', 'z', 'y']), keep='first'))

Int64Index([1, 2, 3], dtype='int64', name='a')
MultiIndex([(  0,   2,   1),
            ('a', 'c', 'b')],
           names=['x', 'z', 'y'])
MultiIndex([(  0,   1,   2),
            ('a', 'b', 'c')],
           names=['x', 'y', 'z'])


In [27]:
i23 = stack_indices(i2, i3)
i32 = stack_indices(i3, i2)

print(i23)
print(i32)
print(stack_indices(i23, i32))

MultiIndex([(1, 'mix_0'),
            (2, 'mix_1'),
            (3, 'mix_2')],
           names=['b', 'c'])
MultiIndex([('mix_0', 1),
            ('mix_1', 2),
            ('mix_2', 3)],
           names=['c', 'b'])
MultiIndex([(1, 'mix_0', 'mix_0', 1),
            (2, 'mix_1', 'mix_1', 2),
            (3, 'mix_2', 'mix_2', 3)],
           names=['b', 'c', 'c', 'b'])


In [28]:
print(combine_indices(i1, i2)) # combine_indices uses stack_indices
print(combine_indices(i2, i3))
print(combine_indices(i23, i23))

MultiIndex([(0.1, 1),
            (0.1, 2),
            (0.1, 3),
            (0.2, 1),
            (0.2, 2),
            (0.2, 3)],
           names=['a', 'b'])
MultiIndex([(1, 'mix_0'),
            (1, 'mix_1'),
            (1, 'mix_2'),
            (2, 'mix_0'),
            (2, 'mix_1'),
            (2, 'mix_2'),
            (3, 'mix_0'),
            (3, 'mix_1'),
            (3, 'mix_2')],
           names=['b', 'c'])
MultiIndex([(1, 'mix_0', 1, 'mix_0'),
            (1, 'mix_0', 2, 'mix_1'),
            (1, 'mix_0', 3, 'mix_2'),
            (2, 'mix_1', 1, 'mix_0'),
            (2, 'mix_1', 2, 'mix_1'),
            (2, 'mix_1', 3, 'mix_2'),
            (3, 'mix_2', 1, 'mix_0'),
            (3, 'mix_2', 2, 'mix_1'),
            (3, 'mix_2', 3, 'mix_2')],
           names=['b', 'c', 'b', 'c'])


In [29]:
print(drop_levels(i23, 'b'))
print(drop_levels(i23, 'c'))
print(drop_levels(i23, ['b', 'c'])) # you can't remove all levels

Index(['mix_0', 'mix_1', 'mix_2'], dtype='object', name='c')
Int64Index([1, 2, 3], dtype='int64', name='b')
MultiIndex([(1, 'mix_0'),
            (2, 'mix_1'),
            (3, 'mix_2')],
           names=['b', 'c'])


In [30]:
print(rename_levels(i23, {'b': 'd', 'c': 'e'}))

MultiIndex([(1, 'mix_0'),
            (2, 'mix_1'),
            (3, 'mix_2')],
           names=['d', 'e'])


In [94]:
print(select_index_levels(i23, 'b'))
print(select_index_levels(i23, ['b']))
print(select_index_levels(i23, ['b', 'c']))

Int64Index([1, 2, 3], dtype='int64', name='b')
MultiIndex([(1,),
            (2,),
            (3,)],
           names=['b'])
MultiIndex([(1, 'mix_0'),
            (2, 'mix_1'),
            (3, 'mix_2')],
           names=['b', 'c'])


## Broadcasting

In [31]:
print(soft_broadcast_to_ndim(a2, 1))
print(soft_broadcast_to_ndim(sr2, 1))
print(soft_broadcast_to_ndim(df2, 1))
print(soft_broadcast_to_ndim(df4, 1)) # cannot -> do nothing
print(soft_broadcast_to_ndim(a2, 2))
print(soft_broadcast_to_ndim(sr2, 2))
print(soft_broadcast_to_ndim(df2, 2))

[1 2 3]
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64
i4
x4    1
y4    2
z4    3
Name: a4, dtype: int64
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9
[[1]
 [2]
 [3]]
    a2
i2    
x2   1
y2   2
z2   3
c4  a4
i4    
x4   1
y4   2
z4   3


In [32]:
print(wrap_array(a1, index=sr1.index, columns=[sr1.name], to_ndim=1))
print(wrap_array(a1, index=sr1.index, columns=[sr1.name], to_ndim=2))
print(wrap_array(a2, index=sr2.index, columns=[sr2.name], to_ndim=1))
print(wrap_array(a2, index=sr2.index, columns=[sr2.name], to_ndim=2))
print(wrap_array(a2, index=df2.index, columns=df2.columns, to_ndim=1))
print(wrap_array(a2, index=df2.index, columns=df2.columns, to_ndim=2))
print(wrap_array(a2, index=df4.index, columns=None, default_index=df2.index, default_columns=df2.columns, to_ndim=1))
print(wrap_array(a2, index=df4.index, columns=None, default_index=df2.index, default_columns=df2.columns, to_ndim=2))

i1
x1    1
Name: a1, dtype: int64
    a1
i1    
x1   1
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64
    a2
i2    
x2   1
y2   2
z2   3
i4
x4    1
y4    2
z4    3
Name: a4, dtype: int64
c4  a4
i4    
x4   1
y4   2
z4   3
i6
x6    1
y6    2
z6    3
Name: a4, dtype: int64
c4  a4
i6    
x6   1
y6   2
z6   3


In [33]:
print(to_1d(None))
print(to_1d(v1))
print(to_1d(a1))
print(to_1d(a2))
print(to_1d(sr1))
print(to_1d(sr2))
print(to_1d(df1))
print(to_1d(df2))

[None]
[0]
[1]
[1 2 3]
i1
x1    1
Name: a1, dtype: int64
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64
i3
x3    1
Name: a3, dtype: int64
i4
x4    1
y4    2
z4    3
Name: a4, dtype: int64


In [34]:
print(to_2d(None))
print(to_2d(v1))
print(to_2d(a1))
print(to_2d(a2))
print(to_2d(sr1))
print(to_2d(sr2))
print(to_2d(sr2, expand_axis=0))

[[None]]
[[0]]
[[1]]
[[1]
 [2]
 [3]]
    a1
i1    
x1   1
    a2
i2    
x2   1
y2   2
z2   3
i2  x2  y2  z2
0    1   2   3


In [35]:
print(tile(v1, 3, along_axis=0))
print(tile(a1, 3, along_axis=0))
print(tile(a2, 3, along_axis=0))
print(tile(a3, 3, along_axis=0))
print(tile(a4, 3, along_axis=0))
print(tile(a5, 3, along_axis=0))
print(tile(sr1, 3, along_axis=0))
print(tile(sr2, 3, along_axis=0))
print(tile(df1, 3, along_axis=0))
print(tile(df2, 3, along_axis=0))
print(tile(df3, 3, along_axis=0))
print(tile(df4, 3, along_axis=0))

None
[1 1 1]
[1 2 3 1 2 3 1 2 3]
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1]
 [2]
 [3]
 [1]
 [2]
 [3]
 [1]
 [2]
 [3]]
[[1 2 3]
 [4 5 6]
 [7 8 9]
 [1 2 3]
 [4 5 6]
 [7 8 9]
 [1 2 3]
 [4 5 6]
 [7 8 9]]
i1
x1    1
x1    1
x1    1
Name: a1, dtype: int64
i2
x2    1
y2    2
z2    3
x2    1
y2    2
z2    3
x2    1
y2    2
z2    3
Name: a2, dtype: int64
c3  a3
i3    
x3   1
x3   1
x3   1
c4  a4
i4    
x4   1
y4   2
z4   3
x4   1
y4   2
z4   3
x4   1
y4   2
z4   3
c5  a5  b5  c5
i5            
x5   1   2   3
x5   1   2   3
x5   1   2   3
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9
x6   1   2   3
y6   4   5   6
z6   7   8   9
x6   1   2   3
y6   4   5   6
z6   7   8   9


In [36]:
print(tile(v1, 3, along_axis=1))
print(tile(a1, 3, along_axis=1))
print(tile(a2, 3, along_axis=1))
print(tile(a3, 3, along_axis=1))
print(tile(a4, 3, along_axis=1))
print(tile(a5, 3, along_axis=1))
print(tile(sr1, 3, along_axis=1))
print(tile(sr2, 3, along_axis=1))
print(tile(df1, 3, along_axis=1))
print(tile(df2, 3, along_axis=1))
print(tile(df3, 3, along_axis=1))
print(tile(df4, 3, along_axis=1))

[[0 0 0]]
[[1 1 1]]
[[1 1 1]
 [2 2 2]
 [3 3 3]]
[[1 2 3 1 2 3 1 2 3]]
[[1 1 1]
 [2 2 2]
 [3 3 3]]
[[1 2 3 1 2 3 1 2 3]
 [4 5 6 4 5 6 4 5 6]
 [7 8 9 7 8 9 7 8 9]]
    a1  a1  a1
i1            
x1   1   1   1
    a2  a2  a2
i2            
x2   1   1   1
y2   2   2   2
z2   3   3   3
c3  a3  a3  a3
i3            
x3   1   1   1
c4  a4  a4  a4
i4            
x4   1   1   1
y4   2   2   2
z4   3   3   3
c5  a5  b5  c5  a5  b5  c5  a5  b5  c5
i5                                    
x5   1   2   3   1   2   3   1   2   3
c6  a6  b6  c6  a6  b6  c6  a6  b6  c6
i6                                    
x6   1   2   3   1   2   3   1   2   3
y6   4   5   6   4   5   6   4   5   6
z6   7   8   9   7   8   9   7   8   9


In [37]:
print(repeat(v1, 3, along_axis=0))
print(repeat(a1, 3, along_axis=0))
print(repeat(a2, 3, along_axis=0))
print(repeat(a3, 3, along_axis=0))
print(repeat(a4, 3, along_axis=0))
print(repeat(a5, 3, along_axis=0))
print(repeat(sr1, 3, along_axis=0))
print(repeat(sr2, 3, along_axis=0))
print(repeat(df1, 3, along_axis=0))
print(repeat(df2, 3, along_axis=0))
print(repeat(df3, 3, along_axis=0))
print(repeat(df4, 3, along_axis=0))

[0 0 0]
[1 1 1]
[1 1 1 2 2 2 3 3 3]
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1]
 [1]
 [1]
 [2]
 [2]
 [2]
 [3]
 [3]
 [3]]
[[1 2 3]
 [1 2 3]
 [1 2 3]
 [4 5 6]
 [4 5 6]
 [4 5 6]
 [7 8 9]
 [7 8 9]
 [7 8 9]]
i1
x1    1
x1    1
x1    1
Name: a1, dtype: int64
i2
x2    1
x2    1
x2    1
y2    2
y2    2
y2    2
z2    3
z2    3
z2    3
Name: a2, dtype: int64
c3  a3
i3    
x3   1
x3   1
x3   1
c4  a4
i4    
x4   1
x4   1
x4   1
y4   2
y4   2
y4   2
z4   3
z4   3
z4   3
c5  a5  b5  c5
i5            
x5   1   2   3
x5   1   2   3
x5   1   2   3
c6  a6  b6  c6
i6            
x6   1   2   3
x6   1   2   3
x6   1   2   3
y6   4   5   6
y6   4   5   6
y6   4   5   6
z6   7   8   9
z6   7   8   9
z6   7   8   9


In [38]:
print(repeat(v1, 3, along_axis=1))
print(repeat(a1, 3, along_axis=1))
print(repeat(a2, 3, along_axis=1))
print(repeat(a3, 3, along_axis=1))
print(repeat(a4, 3, along_axis=1))
print(repeat(a5, 3, along_axis=1))
print(repeat(sr1, 3, along_axis=1))
print(repeat(sr2, 3, along_axis=1))
print(repeat(df1, 3, along_axis=1))
print(repeat(df2, 3, along_axis=1))
print(repeat(df3, 3, along_axis=1))
print(repeat(df4, 3, along_axis=1))

[[0 0 0]]
[[1 1 1]]
[[1 1 1]
 [2 2 2]
 [3 3 3]]
[[1 1 1 2 2 2 3 3 3]]
[[1 1 1]
 [2 2 2]
 [3 3 3]]
[[1 1 1 2 2 2 3 3 3]
 [4 4 4 5 5 5 6 6 6]
 [7 7 7 8 8 8 9 9 9]]
    a1  a1  a1
i1            
x1   1   1   1
    a2  a2  a2
i2            
x2   1   1   1
y2   2   2   2
z2   3   3   3
c3  a3  a3  a3
i3            
x3   1   1   1
c4  a4  a4  a4
i4            
x4   1   1   1
y4   2   2   2
z4   3   3   3
c5  a5  a5  a5  b5  b5  b5  c5  c5  c5
i5                                    
x5   1   1   1   2   2   2   3   3   3
c6  a6  a6  a6  b6  b6  b6  c6  c6  c6
i6                                    
x6   1   1   1   2   2   2   3   3   3
y6   4   4   4   5   5   5   6   6   6
z6   7   7   7   8   8   8   9   9   9


In [39]:
multi_c1 = pd.MultiIndex.from_arrays([['a8', 'b8']], names=['c8'])
multi_c2 = pd.MultiIndex.from_arrays([['a7', 'a7', 'c7', 'c7'], ['a8', 'b8', 'a8', 'b8']], names=['c7', 'c8'])

align_index_to(multi_c1, multi_c2)

array([0, 1, 0, 1])

In [40]:
# Change broadcasting rules globally
vbt.utils.broadcast_defaults['index_from'] = 'stack' # default is 'strict'
vbt.utils.broadcast_defaults['columns_from'] = 'stack'

print(vbt.utils.broadcast_defaults)

{'index_from': 'stack', 'columns_from': 'stack', 'ignore_single': True, 'drop_duplicates': True, 'keep': 'last'}


In [41]:
# Broadcasting arrays
args = [
    ('v1', v1),
    ('a1', a1),
    ('a2', a2),
    ('a3', a3),
    ('a4', a4),
    ('a5', a5)
]
arg_combs = list(itertools.combinations_with_replacement(args, 2))

for (n1, arg1), (n2, arg2) in arg_combs:
    print(n1 + '+' + n2)
    arg1, arg2 = broadcast(arg1, arg2)
    print(arg1)
    print(arg2)
    print()

v1+v1
0
0

v1+a1
[0]
[1]

v1+a2
[0 0 0]
[1 2 3]

v1+a3
[[0 0 0]]
[[1 2 3]]

v1+a4
[[0]
 [0]
 [0]]
[[1]
 [2]
 [3]]

v1+a5
[[0 0 0]
 [0 0 0]
 [0 0 0]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]

a1+a1
[1]
[1]

a1+a2
[1 1 1]
[1 2 3]

a1+a3
[[1 1 1]]
[[1 2 3]]

a1+a4
[[1]
 [1]
 [1]]
[[1]
 [2]
 [3]]

a1+a5
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]

a2+a2
[1 2 3]
[1 2 3]

a2+a3
[[1 2 3]]
[[1 2 3]]

a2+a4
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1 1 1]
 [2 2 2]
 [3 3 3]]

a2+a5
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]

a3+a3
[[1 2 3]]
[[1 2 3]]

a3+a4
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1 1 1]
 [2 2 2]
 [3 3 3]]

a3+a5
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]

a4+a4
[[1]
 [2]
 [3]]
[[1]
 [2]
 [3]]

a4+a5
[[1 1 1]
 [2 2 2]
 [3 3 3]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]

a5+a5
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]



In [42]:
# Broadcasting series
args = [
    ('sr1', sr1),
    ('sr2', sr2)
]
arg_combs = list(itertools.combinations_with_replacement(args, 2))

for (n1, arg1), (n2, arg2) in arg_combs:
    print(n1 + '+' + n2)
    arg1, arg2 = broadcast(arg1, arg2)
    print(arg1)
    print(arg2)
    print()

sr1+sr1
i1
x1    1
Name: a1, dtype: int64
i1
x1    1
Name: a1, dtype: int64

sr1+sr2
i2
x2    1
y2    1
z2    1
Name: (a1, a2), dtype: int64
i2
x2    1
y2    2
z2    3
Name: (a1, a2), dtype: int64

sr2+sr2
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64



In [43]:
# Broadcasting arrays and series
a_args = [
    ('v1', v1),
    ('a1', a1),
    ('a2', a2),
    ('a3', a3),
    ('a4', a4),
    ('a5', a5)
]
sr_args = [
    ('sr1', sr1),
    ('sr2', sr2)
]
arg_combs = list(itertools.product(a_args, sr_args))

for (n1, arg1), (n2, arg2) in arg_combs:
    print(n1 + '+' + n2)
    arg1, arg2 = broadcast(arg1, arg2)
    print(arg1)
    print(arg2)
    print()

v1+sr1
i1
x1    0
Name: a1, dtype: int64
i1
x1    1
Name: a1, dtype: int64

v1+sr2
i2
x2    0
y2    0
z2    0
Name: a2, dtype: int64
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64

a1+sr1
i1
x1    1
Name: a1, dtype: int64
i1
x1    1
Name: a1, dtype: int64

a1+sr2
i2
x2    1
y2    1
z2    1
Name: a2, dtype: int64
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64

a2+sr1
i1
x1    1
x1    2
x1    3
Name: a1, dtype: int64
i1
x1    1
x1    1
x1    1
Name: a1, dtype: int64

a2+sr2
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64

a3+sr1
    a1  a1  a1
i1            
x1   1   2   3
    a1  a1  a1
i1            
x1   1   1   1

a3+sr2
    a2  a2  a2
i2            
x2   1   2   3
y2   1   2   3
z2   1   2   3
    a2  a2  a2
i2            
x2   1   1   1
y2   2   2   2
z2   3   3   3

a4+sr1
    a1
i1    
x1   1
x1   2
x1   3
    a1
i1    
x1   1
x1   1
x1   1

a4+sr2
    a2
i2    
x2   1
y2   2
z2   3
    a2
i2    
x2   1
y2   2
z2   3

a5+s

In [44]:
# Broadcasting dataframes
args = [
    ('df1', df1),
    ('df2', df2),
    ('df3', df3),
    ('df4', df4)
]
arg_combs = list(itertools.combinations_with_replacement(args, 2))

for (n1, arg1), (n2, arg2) in arg_combs:
    print(n1 + '+' + n2)
    arg1, arg2 = broadcast(arg1, arg2)
    print(arg1)
    print(arg2)
    print()

df1+df1
c3  a3
i3    
x3   1
c3  a3
i3    
x3   1

df1+df2
c3 a3
c4 a4
i4   
x4  1
y4  1
z4  1
c3 a3
c4 a4
i4   
x4  1
y4  2
z4  3

df1+df3
c5     a5  b5  c5
i3 i5            
x3 x5   1   1   1
c5     a5  b5  c5
i3 i5            
x3 x5   1   2   3

df1+df4
c6  a6  b6  c6
i6            
x6   1   1   1
y6   1   1   1
z6   1   1   1
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9

df2+df2
c4  a4
i4    
x4   1
y4   2
z4   3
c4  a4
i4    
x4   1
y4   2
z4   3

df2+df3
c5  a5  b5  c5
i4            
x4   1   1   1
y4   2   2   2
z4   3   3   3
c5  a5  b5  c5
i4            
x4   1   2   3
y4   1   2   3
z4   1   2   3

df2+df4
c6     a6  b6  c6
i4 i6            
x4 x6   1   1   1
y4 y6   2   2   2
z4 z6   3   3   3
c6     a6  b6  c6
i4 i6            
x4 x6   1   2   3
y4 y6   4   5   6
z4 z6   7   8   9

df3+df3
c5  a5  b5  c5
i5            
x5   1   2   3
c5  a5  b5  c5
i5            
x5   1   2   3

df3+df4
c5 a5 b5 c5
c6 a6 b6 c6
i6         
x6  1  2  3
y6  1  2  

In [45]:
# Broadcasting arrays and dataframes
a_args = [
    ('v1', v1),
    ('a1', a1),
    ('a2', a2),
    ('a3', a3),
    ('a4', a4),
    ('a5', a5)
]
sr_args = [
    ('df1', df1),
    ('df2', df2),
    ('df3', df3),
    ('df4', df4)
]
arg_combs = list(itertools.product(a_args, sr_args))

for (n1, arg1), (n2, arg2) in arg_combs:
    print(n1 + '+' + n2)
    arg1, arg2 = broadcast(arg1, arg2)
    print(arg1)
    print(arg2)
    print()

v1+df1
c3  a3
i3    
x3   0
c3  a3
i3    
x3   1

v1+df2
c4  a4
i4    
x4   0
y4   0
z4   0
c4  a4
i4    
x4   1
y4   2
z4   3

v1+df3
c5  a5  b5  c5
i5            
x5   0   0   0
c5  a5  b5  c5
i5            
x5   1   2   3

v1+df4
c6  a6  b6  c6
i6            
x6   0   0   0
y6   0   0   0
z6   0   0   0
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9

a1+df1
c3  a3
i3    
x3   1
c3  a3
i3    
x3   1

a1+df2
c4  a4
i4    
x4   1
y4   1
z4   1
c4  a4
i4    
x4   1
y4   2
z4   3

a1+df3
c5  a5  b5  c5
i5            
x5   1   1   1
c5  a5  b5  c5
i5            
x5   1   2   3

a1+df4
c6  a6  b6  c6
i6            
x6   1   1   1
y6   1   1   1
z6   1   1   1
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9

a2+df1
c3  a3  a3  a3
i3            
x3   1   2   3
c3  a3  a3  a3
i3            
x3   1   1   1

a2+df2
c4  a4  a4  a4
i4            
x4   1   2   3
y4   1   2   3
z4   1   2   3
c4  a4  a4  a4
i4            
x4   1   1   1
y4   2 

In [46]:
# Broadcasting series and dataframes
a_args = [
    ('sr1', sr1),
    ('sr2', sr2)
]
sr_args = [
    ('df1', df1),
    ('df2', df2),
    ('df3', df3),
    ('df4', df4)
]
arg_combs = list(itertools.product(a_args, sr_args))

for (n1, arg1), (n2, arg2) in arg_combs:
    print(n1 + '+' + n2)
    arg1, arg2 = broadcast(arg1, arg2)
    print(arg1)
    print(arg2)
    print()

sr1+df1
      a1
c3    a3
i1 i3   
x1 x3  1
      a1
c3    a3
i1 i3   
x1 x3  1

sr1+df2
   a1
c4 a4
i4   
x4  1
y4  1
z4  1
   a1
c4 a4
i4   
x4  1
y4  2
z4  3

sr1+df3
c5     a5  b5  c5
i1 i5            
x1 x5   1   1   1
c5     a5  b5  c5
i1 i5            
x1 x5   1   2   3

sr1+df4
c6  a6  b6  c6
i6            
x6   1   1   1
y6   1   1   1
z6   1   1   1
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9

sr2+df1
   a2
c3 a3
i2   
x2  1
y2  2
z2  3
   a2
c3 a3
i2   
x2  1
y2  1
z2  1

sr2+df2
      a2
c4    a4
i2 i4   
x2 x4  1
y2 y4  2
z2 z4  3
      a2
c4    a4
i2 i4   
x2 x4  1
y2 y4  2
z2 z4  3

sr2+df3
c5  a5  b5  c5
i2            
x2   1   1   1
y2   2   2   2
z2   3   3   3
c5  a5  b5  c5
i2            
x2   1   2   3
y2   1   2   3
z2   1   2   3

sr2+df4
c6     a6  b6  c6
i2 i6            
x2 x6   1   1   1
y2 y6   2   2   2
z2 z6   3   3   3
c6     a6  b6  c6
i2 i6            
x2 x6   1   2   3
y2 y6   4   5   6
z2 z6   7   8   9



In [47]:
# Broadcasting all at once
for i in broadcast(
    v1, a1, a2, a3, a4, a5, sr1, sr2, df1, df2, df3, df4,
    index_from='stack',
    columns_from='stack'
):
    print(i)

c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  0  0  0
y2 y4 y6  0  0  0
z2 z4 z6  0  0  0
c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  1  1  1
y2 y4 y6  1  1  1
z2 z4 z6  1  1  1
c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  1  2  3
y2 y4 y6  1  2  3
z2 z4 z6  1  2  3
c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  1  2  3
y2 y4 y6  1  2  3
z2 z4 z6  1  2  3
c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  1  1  1
y2 y4 y6  2  2  2
z2 z4 z6  3  3  3
c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  1  2  3
y2 y4 y6  4  5  6
z2 z4 z6  7  8  9
c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  1  1  1
y2 y4 y6  1  1  1
z2 z4 z6  1  1  1
c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  1  1  1
y2 y4 y6  2  2  2
z2 z4 z6  3  3  3
c5       a5 b5 c5
c6       a6 b6 c6
i2 i4 i6         
x2 x4 x6  1  1  1
y2 y4 y6  1  1  1
z2 z4 z6  1  1  1
c5       a5 b5 c5
c6       a

In [48]:
for i in broadcast(
    v1, a1, a2, a3, a4, a5, sr1, sr2, df1, df2, df3, df4,
    index_from=None, # use as-is
    columns_from=None
):
    print(i)

[[0 0 0]
 [0 0 0]
 [0 0 0]]
[[1 1 1]
 [1 1 1]
 [1 1 1]]
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[1 1 1]
 [2 2 2]
 [3 3 3]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
    a1  a1  a1
i1            
x1   1   1   1
x1   1   1   1
x1   1   1   1
    a2  a2  a2
i2            
x2   1   1   1
y2   2   2   2
z2   3   3   3
c3  a3  a3  a3
i3            
x3   1   1   1
x3   1   1   1
x3   1   1   1
c4  a4  a4  a4
i4            
x4   1   1   1
y4   2   2   2
z4   3   3   3
c5  a5  b5  c5
i5            
x5   1   2   3
x5   1   2   3
x5   1   2   3
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9


In [49]:
for i in broadcast(
    v1, a1, a2, a3, a4, a5, sr1, sr2, df1, df2, df3, df4,
    index_from=-1, # take index from the last dataframe
    columns_from=-1
):
    print(i)

c6  a6  b6  c6
i6            
x6   0   0   0
y6   0   0   0
z6   0   0   0
c6  a6  b6  c6
i6            
x6   1   1   1
y6   1   1   1
z6   1   1   1
c6  a6  b6  c6
i6            
x6   1   2   3
y6   1   2   3
z6   1   2   3
c6  a6  b6  c6
i6            
x6   1   2   3
y6   1   2   3
z6   1   2   3
c6  a6  b6  c6
i6            
x6   1   1   1
y6   2   2   2
z6   3   3   3
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9
c6  a6  b6  c6
i6            
x6   1   1   1
y6   1   1   1
z6   1   1   1
c6  a6  b6  c6
i6            
x6   1   1   1
y6   2   2   2
z6   3   3   3
c6  a6  b6  c6
i6            
x6   1   1   1
y6   1   1   1
z6   1   1   1
c6  a6  b6  c6
i6            
x6   1   1   1
y6   2   2   2
z6   3   3   3
c6  a6  b6  c6
i6            
x6   1   2   3
y6   1   2   3
z6   1   2   3
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9


In [50]:
# Do not clean columns
vbt.utils.broadcast_defaults['drop_duplicates'] = False
vbt.utils.broadcast_defaults['ignore_single'] = False

for i in broadcast(
    v1, a1, a2, a3, a4, a5, sr1, sr2, df1, df2, df3, df4,
    index_from='stack', # stack but do not clean
    columns_from='stack'
):
    print(i)
    
vbt.utils.broadcast_defaults.reset()

                  a1      
                  a2      
c3                a3      
c4                a4      
c5                a5 b5 c5
c6                a6 b6 c6
i1 i2 i3 i4 i5 i6         
x1 x2 x3 x4 x5 x6  0  0  0
   y2 x3 y4 x5 y6  0  0  0
   z2 x3 z4 x5 z6  0  0  0
                  a1      
                  a2      
c3                a3      
c4                a4      
c5                a5 b5 c5
c6                a6 b6 c6
i1 i2 i3 i4 i5 i6         
x1 x2 x3 x4 x5 x6  1  1  1
   y2 x3 y4 x5 y6  1  1  1
   z2 x3 z4 x5 z6  1  1  1
                  a1      
                  a2      
c3                a3      
c4                a4      
c5                a5 b5 c5
c6                a6 b6 c6
i1 i2 i3 i4 i5 i6         
x1 x2 x3 x4 x5 x6  1  2  3
   y2 x3 y4 x5 y6  1  2  3
   z2 x3 z4 x5 z6  1  2  3
                  a1      
                  a2      
c3                a3      
c4                a4      
c5                a5 b5 c5
c6                a6 b6 c6
i1 i2 i3 i4 i5 i6         
x

In [51]:
big_a = np.empty((1000, 1000))

%timeit broadcast(np.empty((1,)), big_a) # readonly arrays
%timeit broadcast(np.empty((1,)), big_a, writeable=True) # writable arrays
%timeit broadcast(np.empty((1,)), big_a, writeable=True, copy_kwargs={'order': 'C'}) # writable arrays in same order
%timeit broadcast(big_a, big_a) # no broadcasting
%timeit broadcast(big_a, big_a, writeable=True) # no broadcasting, writeable has no effect
%timeit broadcast(big_a, big_a, writeable=True, copy_kwargs={'order': 'C'}) # no copy
%timeit broadcast(big_a, big_a, writeable=True, copy_kwargs={'order': 'F'}) # copy

41 µs ± 13.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
5.51 ms ± 408 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
6.07 ms ± 316 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
11.7 µs ± 542 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
12.7 µs ± 929 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
11.8 µs ± 965 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
7.82 ms ± 161 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [52]:
# One-side broadcasting, default behaviour is copying index/columns from the second argument
print(broadcast_to(sr1, sr1))
print(broadcast_to(sr1, sr2))
print(broadcast_to(sr1, df1))
print(broadcast_to(sr1, df2))
print(broadcast_to(sr1, df3))
print(broadcast_to(sr1, df4))

i1
x1    1
Name: a1, dtype: int64
i2
x2    1
y2    1
z2    1
Name: a2, dtype: int64
c3  a3
i3    
x3   1
c4  a4
i4    
x4   1
y4   1
z4   1
c5  a5  b5  c5
i5            
x5   1   1   1
c6  a6  b6  c6
i6            
x6   1   1   1
y6   1   1   1
z6   1   1   1


In [53]:
# Broadcasting first element to be an array out of the second argument
print(broadcast_to_array_of(0.1, v1))
print(broadcast_to_array_of([0.1], v1))
print(broadcast_to_array_of([0.1, 0.2], v1))

[0.1]
[0.1]
[0.1 0.2]


In [54]:
print(broadcast_to_array_of(0.1, sr2))
print(broadcast_to_array_of([0.1], sr2))
print(broadcast_to_array_of([0.1, 0.2], sr2))
print(broadcast_to_array_of([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], sr2))

[[0.1 0.1 0.1]]
[[0.1 0.1 0.1]]
[[0.1 0.1 0.1]
 [0.2 0.2 0.2]]
[[0.1 0.2 0.3]
 [0.4 0.5 0.6]]


In [55]:
print(broadcast_to_array_of(0.1, df2))
print(broadcast_to_array_of([0.1], df2))
print(broadcast_to_array_of([0.1, 0.2], df2))
print(broadcast_to_array_of([[[0.1], [0.2], [0.3]], [[0.4], [0.5], [0.6]]], df2))

[[[0.1]
  [0.1]
  [0.1]]]
[[[0.1]
  [0.1]
  [0.1]]]
[[[0.1]
  [0.1]
  [0.1]]

 [[0.2]
  [0.2]
  [0.2]]]
[[[0.1]
  [0.2]
  [0.3]]

 [[0.4]
  [0.5]
  [0.6]]]


In [56]:
print(broadcast_to_array_of(0.1, np.empty((2, 2, 2)))) # works even for ndim > 2

[[[[0.1 0.1]
   [0.1 0.1]]

  [[0.1 0.1]
   [0.1 0.1]]]]


## Indexing

In [57]:
def indexing_func(obj, loc_pandas_func): 
    # As soon as you call iloc etc., performs it on each dataframe and mapper and returns a new class instance
    param1_mapper = loc_mapper(obj._param1_mapper, obj.a, loc_pandas_func)
    param2_mapper = loc_mapper(obj._param2_mapper, obj.a, loc_pandas_func)
    tuple_mapper = loc_mapper(obj._tuple_mapper, obj.a, loc_pandas_func)
    return H(loc_pandas_func(obj.a), param1_mapper, param2_mapper, tuple_mapper)

@add_indexing(indexing_func) # indexing using pandas
@add_param_indexing('param1', indexing_func) # indexing using params
@add_param_indexing('param2', indexing_func)
@add_param_indexing('tuple', indexing_func)
class H():
    def __init__(self, a, param1_mapper, param2_mapper, tuple_mapper):
        self.a = a
        self._param1_mapper = param1_mapper
        self._param2_mapper = param2_mapper
        self._tuple_mapper = tuple_mapper
        
    @classmethod
    def from_params(cls, a, params1, params2, level_names=('p1', 'p2')):
        a = to_2d(a)
        # Build column hierarchy
        params1_idx = pd.Index(params1, name=level_names[0])
        params2_idx = pd.Index(params2, name=level_names[1])
        params_idx = stack_indices(params1_idx, params2_idx)
        new_columns = combine_indices(params_idx, a.columns)
        
        # Build mappers
        param1_mapper = np.repeat(params1, len(a.columns))
        param1_mapper = pd.Series(param1_mapper, index=new_columns, name=params1_idx.name)
        
        param2_mapper = np.repeat(params2, len(a.columns))
        param2_mapper = pd.Series(param2_mapper, index=new_columns, name=params2_idx.name)
        
        tuple_mapper = list(zip(*list(map(lambda x: x.values, [param1_mapper, param2_mapper]))))
        tuple_mapper = pd.Series(tuple_mapper, index=new_columns, name=(params1_idx.name, params2_idx.name))
        
        # Tile a to match the length of new_columns
        a = wrap_array(tile(a.values, 4, along_axis=1), index=a.index, columns=new_columns)
        return cls(a, param1_mapper, param2_mapper, tuple_mapper)
        

# Similate an indicator with two params
h = H.from_params(df4, [0.1, 0.1, 0.2, 0.2], [0.3, 0.4, 0.5, 0.6])

print(df4)
print(h.a)
print(h._param1_mapper)
print(h._param2_mapper)
print(h._tuple_mapper)

c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9
p1 0.1                 0.2                
p2 0.3       0.4       0.5       0.6      
c6  a6 b6 c6  a6 b6 c6  a6 b6 c6  a6 b6 c6
i6                                        
x6   1  2  3   1  2  3   1  2  3   1  2  3
y6   4  5  6   4  5  6   4  5  6   4  5  6
z6   7  8  9   7  8  9   7  8  9   7  8  9
p1   p2   c6
0.1  0.3  a6    0.1
          b6    0.1
          c6    0.1
     0.4  a6    0.1
          b6    0.1
          c6    0.1
0.2  0.5  a6    0.2
          b6    0.2
          c6    0.2
     0.6  a6    0.2
          b6    0.2
          c6    0.2
Name: p1, dtype: float64
p1   p2   c6
0.1  0.3  a6    0.3
          b6    0.3
          c6    0.3
     0.4  a6    0.4
          b6    0.4
          c6    0.4
0.2  0.5  a6    0.5
          b6    0.5
          c6    0.5
     0.6  a6    0.6
          b6    0.6
          c6    0.6
Name: p2, dtype: float64
p1   p2   c6
0.1  0.3  a6    (0.1, 0.3)
          b6    (0.1, 0.3)
 

In [58]:
# Indexing operations are delegated to the underlying dataframes
print(h[(0.1, 0.3, 'a6')].a)
print(h.loc[:, (0.1, 0.3, 'a6'):(0.1, 0.3, 'c6')].a)
print(h.iloc[-2:, -2:].a)

i6
x6    1
y6    4
z6    7
Name: (0.1, 0.3, a6), dtype: int64
p1 0.1      
p2 0.3      
c6  a6 b6 c6
i6          
x6   1  2  3
y6   4  5  6
z6   7  8  9
p1 0.2   
p2 0.6   
c6  b6 c6
i6       
y6   5  6
z6   8  9


In [59]:
print(h.param1_loc[0.1].a)
print(h.param1_loc[0.1:0.1].a)
print(h.param1_loc[[0.1, 0.1]].a)

p2 0.3       0.4      
c6  a6 b6 c6  a6 b6 c6
i6                    
x6   1  2  3   1  2  3
y6   4  5  6   4  5  6
z6   7  8  9   7  8  9
p1 0.1                
p2 0.3       0.4      
c6  a6 b6 c6  a6 b6 c6
i6                    
x6   1  2  3   1  2  3
y6   4  5  6   4  5  6
z6   7  8  9   7  8  9
p1 0.1                                    
p2 0.3       0.4       0.3       0.4      
c6  a6 b6 c6  a6 b6 c6  a6 b6 c6  a6 b6 c6
i6                                        
x6   1  2  3   1  2  3   1  2  3   1  2  3
y6   4  5  6   4  5  6   4  5  6   4  5  6
z6   7  8  9   7  8  9   7  8  9   7  8  9


In [60]:
print(h.param2_loc[0.3].a)
print(h.param2_loc[0.3:0.3].a)
print(h.param2_loc[[0.3, 0.3]].a)

p1 0.1      
c6  a6 b6 c6
i6          
x6   1  2  3
y6   4  5  6
z6   7  8  9
p1 0.1      
p2 0.3      
c6  a6 b6 c6
i6          
x6   1  2  3
y6   4  5  6
z6   7  8  9
p1 0.1               
p2 0.3               
c6  a6 b6 c6 a6 b6 c6
i6                   
x6   1  2  3  1  2  3
y6   4  5  6  4  5  6
z6   7  8  9  7  8  9


In [61]:
print(h.tuple_loc[(0.1, 0.3)].a)
print(h.tuple_loc[(0.1, 0.3):(0.1, 0.3)].a)
print(h.tuple_loc[[(0.1, 0.3), (0.1, 0.3)]].a)

c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9
p1 0.1      
p2 0.3      
c6  a6 b6 c6
i6          
x6   1  2  3
y6   4  5  6
z6   7  8  9
p1 0.1               
p2 0.3               
c6  a6 b6 c6 a6 b6 c6
i6                   
x6   1  2  3  1  2  3
y6   4  5  6  4  5  6
z6   7  8  9  7  8  9


## Stacking

In [62]:
vbt.utils.broadcast_defaults['index_from'] = 'stack'
vbt.utils.broadcast_defaults['columns_from'] = 'stack'

In [63]:
print(unstack_to_array(df5.iloc[0]))

[[ 1. nan nan]
 [nan  2. nan]
 [nan nan  3.]]


In [64]:
print(make_symmetric(sr1))
print(make_symmetric(sr2))
print(make_symmetric(df1))
print(make_symmetric(df2))
print(make_symmetric(df3))
print(make_symmetric(df4))
print(make_symmetric(pd.Series([1, 2, 3])))

('i1', None)   a1   x1
(i1, None)            
a1            NaN  1.0
x1            1.0  NaN
('i2', None)   a2   x2   y2   z2
(i2, None)                      
a2            NaN  1.0  2.0  3.0
x2            1.0  NaN  NaN  NaN
y2            2.0  NaN  NaN  NaN
z2            3.0  NaN  NaN  NaN
('i3', 'c3')   a3   x3
(i3, c3)              
a3            NaN  1.0
x3            1.0  NaN
('i4', 'c4')   a4   x4   y4   z4
(i4, c4)                        
a4            NaN  1.0  2.0  3.0
x4            1.0  NaN  NaN  NaN
y4            2.0  NaN  NaN  NaN
z4            3.0  NaN  NaN  NaN
('i5', 'c5')   a5   b5   c5   x5
(i5, c5)                        
a5            NaN  NaN  NaN  1.0
b5            NaN  NaN  NaN  2.0
c5            NaN  NaN  NaN  3.0
x5            1.0  2.0  3.0  NaN
('i6', 'c6')   a6   b6   c6   x6   y6   z6
(i6, c6)                                  
a6            NaN  NaN  NaN  1.0  4.0  7.0
b6            NaN  NaN  NaN  2.0  5.0  8.0
c6            NaN  NaN  NaN  3.0  6.0  9.0
x6     

In [65]:
print(unstack_to_df(df5.iloc[0]))
print(unstack_to_df(df5.iloc[0], symmetric=True))

1    a8   b8   c8
0                
a7  1.0  NaN  NaN
b7  NaN  2.0  NaN
c7  NaN  NaN  3.0
(0, 1)   a7   a8   b7   b8   c7   c8
(0, 1)                              
a7      NaN  1.0  NaN  NaN  NaN  NaN
a8      1.0  NaN  NaN  NaN  NaN  NaN
b7      NaN  NaN  NaN  2.0  NaN  NaN
b8      NaN  NaN  2.0  NaN  NaN  NaN
c7      NaN  NaN  NaN  NaN  NaN  3.0
c8      NaN  NaN  NaN  NaN  3.0  NaN


In [66]:
print(apply_and_concat_one(3, lambda i, x, a: x + a[i], sr2.values, [10, 20, 30]))
print(apply_and_concat_one_nb(3, njit(lambda i, x, a: x + a[i]), sr2.values, (10, 20, 30)))
print(apply_and_concat_one(3, lambda i, x, a: x + a[i], df4.values, [10, 20, 30]))
print(apply_and_concat_one_nb(3, njit(lambda i, x, a: x + a[i]), df4.values, (10, 20, 30)))

[[11 21 31]
 [12 22 32]
 [13 23 33]]
[[11 21 31]
 [12 22 32]
 [13 23 33]]
[[11 12 13 21 22 23 31 32 33]
 [14 15 16 24 25 26 34 35 36]
 [17 18 19 27 28 29 37 38 39]]
[[11 12 13 21 22 23 31 32 33]
 [14 15 16 24 25 26 34 35 36]
 [17 18 19 27 28 29 37 38 39]]


In [67]:
print(apply_and_concat_multiple(3, lambda i, x, a: (x, x + a[i]), sr2.values, [10, 20, 30]))
print(apply_and_concat_multiple_nb(3, njit(lambda i, x, a: (x, x + a[i])), sr2.values, (10, 20, 30)))
print(apply_and_concat_multiple(3, lambda i, x, a: (x, x + a[i]), df4.values, [10, 20, 30]))
print(apply_and_concat_multiple_nb(3, njit(lambda i, x, a: (x, x + a[i])), df4.values, (10, 20, 30)))

[array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]]), array([[11, 21, 31],
       [12, 22, 32],
       [13, 23, 33]])]
[array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]]), array([[11, 21, 31],
       [12, 22, 32],
       [13, 23, 33]])]
[array([[1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6],
       [7, 8, 9, 7, 8, 9, 7, 8, 9]]), array([[11, 12, 13, 21, 22, 23, 31, 32, 33],
       [14, 15, 16, 24, 25, 26, 34, 35, 36],
       [17, 18, 19, 27, 28, 29, 37, 38, 39]])]
[array([[1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6],
       [7, 8, 9, 7, 8, 9, 7, 8, 9]]), array([[11, 12, 13, 21, 22, 23, 31, 32, 33],
       [14, 15, 16, 24, 25, 26, 34, 35, 36],
       [17, 18, 19, 27, 28, 29, 37, 38, 39]])]


In [68]:
print(apply_and_concat(sr2.values, 3, lambda i, x, a: x + a[i], [10, 20, 30]))
print(apply_and_concat_nb(sr2.values, 3, njit(lambda i, x, a: x + a[i]), (10, 20, 30)))
print(apply_and_concat(df4.values, 3, lambda i, x, a: x + a[i], [10, 20, 30]))
print(apply_and_concat_nb(df4.values, 3, njit(lambda i, x, a: x + a[i]), (10, 20, 30)))

[[11 21 31]
 [12 22 32]
 [13 23 33]]
[[11 21 31]
 [12 22 32]
 [13 23 33]]
[[11 12 13 21 22 23 31 32 33]
 [14 15 16 24 25 26 34 35 36]
 [17 18 19 27 28 29 37 38 39]]
[[11 12 13 21 22 23 31 32 33]
 [14 15 16 24 25 26 34 35 36]
 [17 18 19 27 28 29 37 38 39]]


In [69]:
print(combine_and_concat(sr2.values, (10, 20, 30), lambda x, y, a: x + y + a, 100))
print(combine_and_concat_nb(sr2.values, (10, 20, 30), njit(lambda x, y, a: x + y + a), 100))
print(combine_and_concat(df4.values, (10, 20, 30), lambda x, y, a: x + y + a, 100))
print(combine_and_concat_nb(df4.values, (10, 20, 30), njit(lambda x, y, a: x + y + a), 100))

[[111 121 131]
 [112 122 132]
 [113 123 133]]
[[111 121 131]
 [112 122 132]
 [113 123 133]]
[[111 112 113 121 122 123 131 132 133]
 [114 115 116 124 125 126 134 135 136]
 [117 118 119 127 128 129 137 138 139]]
[[111 112 113 121 122 123 131 132 133]
 [114 115 116 124 125 126 134 135 136]
 [117 118 119 127 128 129 137 138 139]]


In [70]:
print(combine_multiple((sr2.values, sr2.values*2, sr2.values*3), lambda x, y, a: x + y + a, 100))
print(combine_multiple_nb((sr2.values, sr2.values*2, sr2.values*3), njit(lambda x, y, a: x + y + a), 100))
print(combine_multiple((df4.values, df4.values*2, df4.values*3), lambda x, y, a: x + y + a, 100))
print(combine_multiple_nb((df4.values, df4.values*2, df4.values*3), njit(lambda x, y, a: x + y + a), 100))

[206 212 218]
[206 212 218]
[[206 212 218]
 [224 230 236]
 [242 248 254]]
[[206 212 218]
 [224 230 236]
 [242 248 254]]


## Numba decorators

In [71]:
def a_nb(self): return self ** 2

@add_safe_nb_methods(a_nb)
class H(pd.DataFrame):
    def h(self): return self

print(H(sr1).h())

    a1
i1    
x1   1


## Caching

In [72]:
class G():
    @cached_property
    def cache_me(self): return np.random.uniform(size=(10000, 10000))
    
g = G()

In [73]:
%time _ = g.cache_me

CPU times: user 1.6 s, sys: 347 ms, total: 1.95 s
Wall time: 3.25 s


In [74]:
%time _ = g.cache_me

CPU times: user 118 µs, sys: 44 µs, total: 162 µs
Wall time: 167 µs


## Custom accessors

In [75]:
print(sr2.vbt.to_1d_array())
print(sr2.vbt.to_2d_array())

[1 2 3]
[[1]
 [2]
 [3]]


In [76]:
# It will try to return pd.Series
print(sr2.vbt.wrap_array(a2)) # returns sr
print(sr2.vbt.wrap_array(df2.values)) # returns sr
print(sr2.vbt.wrap_array(df2.values, index=df2.index, columns=df2.columns)) # returns sr
print(sr2.vbt.wrap_array(df4.values, columns=df4.columns)) # returns df
print(sr2.vbt.wrap_array(df4.values, index=df4.index, columns=df4.columns)) # returns df

i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64
i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64
i4
x4    1
y4    2
z4    3
Name: a4, dtype: int64
c6  a6  b6  c6
i2            
x2   1   2   3
y2   4   5   6
z2   7   8   9
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9


In [77]:
# It will try to return pd.DataFrame
print(df2.vbt.wrap_array(a2)) # returns df
print(df2.vbt.wrap_array(sr2.values)) # returns df
print(df2.vbt.wrap_array(df4.values, columns=df4.columns)) # returns df
print(df2.vbt.wrap_array(df4.values, index=df4.index, columns=df4.columns)) # returns df

c4  a4
i4    
x4   1
y4   2
z4   3
c4  a4
i4    
x4   1
y4   2
z4   3
c6  a6  b6  c6
i4            
x4   1   2   3
y4   4   5   6
z4   7   8   9
c6  a6  b6  c6
i6            
x6   1   2   3
y6   4   5   6
z6   7   8   9


In [78]:
print(df4.vbt.tile(2, as_columns=['a', 'b']))
print(df4.vbt.repeat(2, as_columns=['a', 'b']))

    a        b      
c6 a6 b6 c6 a6 b6 c6
i6                  
x6  1  2  3  1  2  3
y6  4  5  6  4  5  6
z6  7  8  9  7  8  9
c6 a6    b6    c6   
    a  b  a  b  a  b
i6                  
x6  1  1  2  2  3  3
y6  4  4  5  5  6  6
z6  7  7  8  8  9  9


In [79]:
align_index_to(multi_c1, multi_c2)

array([0, 1, 0, 1])

In [80]:
df10 = pd.DataFrame([[1, 2], [4, 5], [7, 8]], columns=multi_c1)
df20 = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=multi_c2)

print(df10)
print(df20)
print(df10.vbt.align_to(df20))

c8 a8 b8
0   1  2
1   4  5
2   7  8
c7 a7    c7    
c8 a8 b8 a8  b8
0   1  2  3   4
1   4  5  6   7
2   7  8  9  10
c7 a7    c7   
c8 a8 b8 a8 b8
0   1  2  1  2
1   4  5  4  5
2   7  8  7  8


In [81]:
print(pd.DataFrame.vbt.broadcast(
    sr2,
    10
))
print(sr2.vbt.broadcast(
    10
))
print(sr2.vbt.broadcast_to(
    df2
))

(i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64, i2
x2    10
y2    10
z2    10
Name: a2, dtype: int64)
(i2
x2    1
y2    2
z2    3
Name: a2, dtype: int64, i2
x2    10
y2    10
z2    10
Name: a2, dtype: int64)
c4  a4
i4    
x4   1
y4   2
z4   3


In [82]:
print(sr2.vbt.make_symmetric())
print(df2.vbt.make_symmetric())
print(df3.vbt.make_symmetric())
print(df4.vbt.make_symmetric())

('i2', None)   a2   x2   y2   z2
(i2, None)                      
a2            NaN  1.0  2.0  3.0
x2            1.0  NaN  NaN  NaN
y2            2.0  NaN  NaN  NaN
z2            3.0  NaN  NaN  NaN
('i4', 'c4')   a4   x4   y4   z4
(i4, c4)                        
a4            NaN  1.0  2.0  3.0
x4            1.0  NaN  NaN  NaN
y4            2.0  NaN  NaN  NaN
z4            3.0  NaN  NaN  NaN
('i5', 'c5')   a5   b5   c5   x5
(i5, c5)                        
a5            NaN  NaN  NaN  1.0
b5            NaN  NaN  NaN  2.0
c5            NaN  NaN  NaN  3.0
x5            1.0  2.0  3.0  NaN
('i6', 'c6')   a6   b6   c6   x6   y6   z6
(i6, c6)                                  
a6            NaN  NaN  NaN  1.0  4.0  7.0
b6            NaN  NaN  NaN  2.0  5.0  8.0
c6            NaN  NaN  NaN  3.0  6.0  9.0
x6            1.0  2.0  3.0  NaN  NaN  NaN
y6            4.0  5.0  6.0  NaN  NaN  NaN
z6            7.0  8.0  9.0  NaN  NaN  NaN


In [83]:
print(df5.iloc[:, 0].vbt.unstack_to_array())

[[ 1. nan nan]
 [nan  4. nan]
 [nan nan  7.]]


In [84]:
print(df5.iloc[:, 0].vbt.unstack_to_df())

1    x8   y8   z8
0                
x7  1.0  NaN  NaN
y7  NaN  4.0  NaN
z7  NaN  NaN  7.0


In [85]:
print(pd.DataFrame.vbt.concat(
    sr2,
    10, 
    [10, 20, 30],
    [[10, 20, 30]],
    pd.Series([10, 20, 30]),
    df1,
    df4
))
print(sr2.vbt.concat(
    10, 
    [10, 20, 30],
    [[10, 20, 30]],
    pd.Series([10, 20, 30]),
    df1,
    df4
))

c6       a6  b6  c6  a6  b6  c6  a6  b6  c6  a6  ...  c6  a6  b6  c6  a6  b6  \
i2   i6                                          ...                           
x2 0 x6   1   1   1  10  10  10  10  20  30  10  ...  30  10  10  10   1   1   
y2 1 y6   2   2   2  10  10  10  10  20  30  10  ...  30  20  20  20   1   1   
z2 2 z6   3   3   3  10  10  10  10  20  30  10  ...  30  30  30  30   1   1   

c6       c6  a6  b6  c6  
i2   i6                  
x2 0 x6   1   1   2   3  
y2 1 y6   1   4   5   6  
z2 2 z6   1   7   8   9  

[3 rows x 21 columns]
c6       a6  b6  c6  a6  b6  c6  a6  b6  c6  a6  ...  c6  a6  b6  c6  a6  b6  \
i2   i6                                          ...                           
x2 0 x6   1   1   1  10  10  10  10  20  30  10  ...  30  10  10  10   1   1   
y2 1 y6   2   2   2  10  10  10  10  20  30  10  ...  30  20  20  20   1   1   
z2 2 z6   3   3   3  10  10  10  10  20  30  10  ...  30  30  30  30   1   1   

c6       c6  a6  b6  c6  
i2   i6            

In [86]:
print(sr2.vbt.apply_and_concat(3, sr2.values, 10, apply_func=lambda i, x, y, c, d=1: x + y[i] + c + d, d=100))
print(sr2.vbt.apply_and_concat(3, sr2.values, 10, apply_func=njit(lambda i, x, y, c: x + y[i] + c + 100)))
print(sr2.vbt.apply_and_concat(3, df4.values, 10, apply_func=lambda i, x, y, c, d=1: x + y[:, i] + c + d, d=100))
print(sr2.vbt.apply_and_concat(3, df4.values, 10, apply_func=njit(lambda i, x, y, c: x + y[:, i] + c + 100)))
print(df4.vbt.apply_and_concat(3, df4.values, 10, apply_func=lambda i, x, y, c, d=1: x + y[:, i] + c + d, d=100))
print(df4.vbt.apply_and_concat(
    3, 
    df4.values, 
    10, 
    apply_func=njit(lambda i, x, y, c: x + y[:, i] + c + 100), 
    as_columns=pd.Index(['a', 'b', 'c'], name='hello')))

     a2   a2   a2
i2               
x2  112  113  114
y2  113  114  115
z2  114  115  116
     a2   a2   a2
i2               
x2  112  113  114
y2  113  114  115
z2  114  115  116
     a2   a2   a2
i2               
x2  112  113  114
y2  116  117  118
z2  120  121  122
     a2   a2   a2
i2               
x2  112  113  114
y2  116  117  118
z2  120  121  122
c6   a6   b6   c6   a6   b6   c6   a6   b6   c6
i6                                             
x6  112  116  120  113  117  121  114  118  122
y6  115  119  123  116  120  124  117  121  125
z6  118  122  126  119  123  127  120  124  128
hello    a              b              c          
c6      a6   b6   c6   a6   b6   c6   a6   b6   c6
i6                                                
x6     112  116  120  113  117  121  114  118  122
y6     115  119  123  116  120  124  117  121  125
z6     118  122  126  119  123  127  120  124  128


In [87]:
print(sr2.vbt.combine_with(10., combine_func=lambda x, y: x + y))
print(sr2.vbt.combine_with(10, 100, d=1000, combine_func=lambda x, y, c, d=1: x + y + c + d)) # test args and kwargs
print(sr2.vbt.combine_with([10, 20, 30], combine_func=lambda x, y: x + y))
print(sr2.vbt.combine_with([[10, 20, 30]], combine_func=lambda x, y: x + y))
print(sr2.vbt.combine_with(sr1, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with(sr2, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with(df2, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with(df3, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with(df4, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with(df5, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))

i2
x2    11.0
y2    12.0
z2    13.0
Name: a2, dtype: float64
i2
x2    1111
y2    1112
z2    1113
Name: a2, dtype: int64
i2
x2    11
y2    22
z2    33
Name: a2, dtype: int64
    a2  a2  a2
i2            
x2  11  21  31
y2  12  22  32
z2  13  23  33
i2
x2    2
y2    3
z2    4
Name: (a2, a1), dtype: int64
i2
x2    2
y2    4
z2    6
Name: a2, dtype: int64
      a2
c4    a4
i2 i4   
x2 x4  2
y2 y4  4
z2 z4  6
c5  a5  b5  c5
i2            
x2   2   3   4
y2   3   4   5
z2   4   5   6
c6     a6  b6  c6
i2 i6            
x2 x6   2   3   4
y2 y6   6   7   8
z2 z6  10  11  12
c7        a7  b7  c7
c8        a8  b8  c8
i2 i7 i8            
x2 x7 x8   2   3   4
y2 y7 y8   6   7   8
z2 z7 z8  10  11  12


In [88]:
print(sr2.vbt.combine_with_multiple(
    [10, 
    [10, 20, 30],
    pd.Series([10, 20, 30])],
    10, b=100,
    combine_func=lambda x, y, a, b=1: x + y + a + b, 
    broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with_multiple(
    [10, 
    [10, 20, 30],
    [[10, 20, 30]],
    pd.Series([10, 20, 30]),
    df1,
    df3],
    10, b=100,
    combine_func=lambda x, y, a, b=1: x + y + a + b, 
    broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with_multiple(
    [10, 
    [10, 20, 30],
    [[10, 20, 30]],
    pd.Series([10, 20, 30]),
    df1,
    df3],
    10,
    combine_func=njit(lambda x, y, a, b=1: x + y + a + 100), 
    broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with_multiple(
    [10, 
    [10, 20, 30],
    [[10, 20, 30]],
    pd.Series([10, 20, 30]),
    df1,
    df3],
    10,
    combine_func=njit(lambda x, y, a, b=1: x + y + a + 100), 
    broadcast_kwargs=dict(index_from='stack')))

i2   
x2  0    361
y2  1    382
z2  2    403
Name: (a2, 0), dtype: int64
c5     a5   b5   c5
i2                 
x2 0  703  724  745
y2 1  714  735  756
z2 2  725  746  767
c5     a5   b5   c5
i2                 
x2 0  703  724  745
y2 1  714  735  756
z2 2  725  746  767
c5     a5   b5   c5
i2                 
x2 0  703  724  745
y2 1  714  735  756
z2 2  725  746  767


In [89]:
# Test concat=True
print(sr2.vbt.combine_with_multiple(
    [10, 
    [10, 20, 30],
    pd.Series([10, 20, 30])],
    10, b=100,
    combine_func=lambda x, y, a, b=1: x + y + a + b, 
    concat=True,
    broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with_multiple(
    [10, 
    [10, 20, 30],
    [[10, 20, 30]],
    pd.Series([10, 20, 30]),
    df1,
    df3],
    10, b=100,
    combine_func=lambda x, y, a, b=1: x + y + a + b, 
    concat=True,
    broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with_multiple(
    [10, 
    [10, 20, 30],
    [[10, 20, 30]],
    pd.Series([10, 20, 30]),
    df1,
    df3],
    10,
    combine_func=njit(lambda x, y, a, b=1: x + y + a + 100),
    concat=True,
    broadcast_kwargs=dict(index_from='stack')))
print(sr2.vbt.combine_with_multiple(
    [10, 
    [10, 20, 30],
    [[10, 20, 30]],
    pd.Series([10, 20, 30]),
    df1,
    df3],
    10,
    combine_func=njit(lambda x, y, a, b=1: x + y + a + 100),
    concat=True,
    as_columns=['a', 'b', 'c', 'd', 'e', 'f'],
    broadcast_kwargs=dict(index_from='stack')))

       a2          
        0    0    0
i2                 
x2 0  121  121  121
y2 1  122  132  132
z2 2  123  143  143
c5     a5   b5   c5   a5   b5   c5   a5   b5   c5   a5   b5   c5   a5   b5  \
i2                                                                           
x2 0  121  121  121  121  131  141  121  131  141  121  121  121  112  112   
y2 1  122  122  122  122  132  142  122  132  142  132  132  132  113  113   
z2 2  123  123  123  123  133  143  123  133  143  143  143  143  114  114   

c5     c5   a5   b5   c5  
i2                        
x2 0  112  112  113  114  
y2 1  113  113  114  115  
z2 2  114  114  115  116  
c5     a5   b5   c5   a5   b5   c5   a5   b5   c5   a5   b5   c5   a5   b5  \
i2                                                                           
x2 0  121  121  121  121  131  141  121  131  141  121  121  121  112  112   
y2 1  122  122  122  122  132  142  122  132  142  132  132  132  113  113   
z2 2  123  123  123  123  133  143  123  1

In [90]:
# Use magic methods with .vbt to do operations with custom broadcasting
# Regular df3 + df4 will return nans
print(df3.vbt + df4.vbt)

c5 a5  b5  c5
c6 a6  b6  c6
i6           
x6  2   4   6
y6  5   7   9
z6  8  10  12
