In [1]:
%load_ext autoreload

import numpy as np
%aimport tafra
from tafra import Tafra

%autoreload 2

In [2]:
t = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})
print('List:\n', t.to_list())
print('Record:\n', tuple(t.to_records()))


gb = t.group_by(
    ['y', 'z'], {'x': sum}, {'count': len}
)
print('\nGroup By:\n', gb)


tr = t.transform(
    ['y', 'z'], {'x': sum}, {'id': max}
)
print('\nTransform:\n', tr)


# tr = t.transform(
#     ['y', 'z'], {'x': sum, 'id': (max, '__id__')}
# )
# print('\nTransform with "magic" enumerator:\n', tr)

# can't do ^ that without magic __id__, but can do it as:
t2 = t.copy()
t2.id = np.empty_like(t2.x)
for i, (u, ix, grouped) in enumerate(t.iterate_by(['y', 'z'])):
    t2.x[ix] = sum(grouped.x)
    t2.id[ix] = i
print('\nTransformed without magic:\n', t2)

    
print('\nIterate By:\t')
for u, ix, grouped in t.iterate_by(['y']):
    print(grouped)


print('\nGroup By in Iterate By:\t')
for u, ix, grouped in t.iterate_by(['y']):
    print(grouped.group_by(['z'], {'x': sum}))

List:
 [array([1, 2, 3, 4, 5, 6]), array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), array([0, 0, 0, 1, 1, 1])]
Record:
 ((1, 'one', 0), (2, 'two', 0), (3, 'one', 0), (4, 'two', 1), (5, 'one', 1), (6, 'two', 1))

Group By:
 Tafra(_data={'y': array(['one', 'two', 'two', 'one'], dtype=object), 'z': array([0, 0, 1, 1]), 'x': array([ 4,  2, 10,  5]), 'count': array([2, 1, 2, 1])}, _dtypes={'y': 'object', 'z': 'int', 'x': 'int', 'count': 'int'})

Transform:
 Tafra(_data={'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1]), 'x': array([ 4,  2,  4, 10,  5, 10]), 'id': array([0, 1, 0, 2, 3, 2])}, _dtypes={'y': 'object', 'z': 'int', 'x': 'int', 'id': 'int'})

Transformed without magic:
 Tafra(_data={'x': array([ 4,  2,  4, 10,  5, 10]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})

Iterate By:	
Tafra(_data={'x': array([1, 3, 5

In [3]:
_t = t.copy()
_t.update(_t.transform(['y'], {}, {'id': max}))

for u, ix, it in t.iterate_by(['y']):
    _t['x'][ix] = it['x'] - np.mean(it['x'])

print('\nIndex Use in Iterate By:')
print(_t)


Index Use in Iterate By:
Tafra(_data={'x': array([-2, -2,  0,  0,  2,  2]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1]), 'id': array([0, 1, 0, 1, 0, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int', 'id': 'int'})


In [4]:
print('\nGroup By in Iterate By:\t')
for u, ix, it in t.iterate_by(['y']):
    it['x'][0] = 9
    print(it)

print()
print(t)


Group By in Iterate By:	
Tafra(_data={'x': array([9, 3, 5]), 'y': array(['one', 'one', 'one'], dtype=object), 'z': array([0, 0, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})
Tafra(_data={'x': array([9, 4, 6]), 'y': array(['two', 'two', 'two'], dtype=object), 'z': array([0, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})

Tafra(_data={'x': array([1, 2, 3, 4, 5, 6]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})


In [5]:
import pandas as pd

l = pd.DataFrame.from_dict({
    'x': np.arange(1000),
})

r = pd.DataFrame.from_dict({
    'a': np.tile(np.arange(1000), 1000),
})

l = l.merge(r, how='left', left_on=['x'], right_on=['a'])


In [6]:
l = Tafra({
    'x': np.arange(1000),
})

r = Tafra({
    'a': np.tile(np.arange(1000), 1000),
})

l = l.left_join(r, [('x', 'a', '==')], ['x', 'a'])

In [7]:
l = Tafra({
    'x': np.arange(100),
})

r = Tafra({
    'a': np.tile(np.arange(10), 1000),
})

l = l.cross_join(r, ['x', 'a'])
print(l)

Tafra(_data={'x': array([ 0,  0,  0, ..., 99, 99, 99]), 'a': array([0, 1, 2, ..., 7, 8, 9])}, _dtypes={'x': 'int', 'a': 'int'})


In [8]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 2, 3, 4, 5, 6]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.left_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 2, 3, 4, 5, 6]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'a': array([1, 2, 3, 4, 5, 6]), 'b': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'int', 'b': 'object'})


In [9]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 1, 1, 2, 2, 2]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.left_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 1, 1, 2, 2, 2, 3, 4, 5, 6]), 'y': array(['one', 'one', 'one', 'two', 'two', 'two', 'one', 'two', 'one',
       'two'], dtype=object), 'a': array([1, 1, 1, 2, 2, 2, None, None, None, None], dtype=object), 'b': array(['one', 'two', 'one', 'two', 'one', 'two', None, None, None, None],
      dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'object', 'b': 'object'})


In [10]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 1, 1, 2, 2, 2]),
    '_a': np.array([1, 1, 2, 2, 3, 3]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.left_join(r, [('x', 'a', '=='), ('x', '_a', '==')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 1, 2, 3, 4, 5, 6]), 'y': array(['one', 'one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'a': array([1, 1, 2, None, None, None, None], dtype=object), 'b': array(['one', 'two', 'two', None, None, None, None], dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'object', 'b': 'object'})


In [11]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 1, 2, 2, 3, 3]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.left_join(r, [('x', 'a', '<')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 1, 1, 1, 2, 2, 3, 4, 5, 6]), 'y': array(['one', 'one', 'one', 'one', 'two', 'two', 'one', 'two', 'one',
       'two'], dtype=object), 'a': array([2, 2, 3, 3, 3, 3, None, None, None, None], dtype=object), 'b': array(['one', 'two', 'one', 'two', 'one', 'two', None, None, None, None],
      dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'object', 'b': 'object'})


In [12]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 1, 1, 2, 2, 2]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.left_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 1, 1, 2, 2, 2, 3, 4, 5, 6]), 'y': array(['one', 'one', 'one', 'two', 'two', 'two', 'one', 'two', 'one',
       'two'], dtype=object), 'a': array([1, 1, 1, 2, 2, 2, None, None, None, None], dtype=object), 'b': array(['one', 'two', 'one', 'two', 'one', 'two', None, None, None, None],
      dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'object', 'b': 'object'})


In [13]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 2, 3, 4, 5, 6]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.inner_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 2, 3, 4, 5, 6]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'a': array([1, 2, 3, 4, 5, 6]), 'b': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'int', 'b': 'object'})


In [14]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 1, 2, 2, 3, 3]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.inner_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 1, 2, 2, 3, 3]), 'y': array(['one', 'one', 'two', 'two', 'one', 'one'], dtype=object), 'a': array([1, 1, 2, 2, 3, 3]), 'b': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'int', 'b': 'object'})


In [15]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 1, 1, 2, 2, 2]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.inner_join(r, [('x', 'a', '==')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 1, 1, 2, 2, 2]), 'y': array(['one', 'one', 'one', 'two', 'two', 'two'], dtype=object), 'a': array([1, 1, 1, 2, 2, 2]), 'b': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'int', 'b': 'object'})


In [16]:
l = Tafra({
    'x': np.array([1, 2, 3, 4, 5, 6]),
    'y': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'z': np.array([0, 0, 0, 1, 1, 1])
})

r = Tafra({
    'a': np.array([1, 1, 1, 2, 2, 2]),
    'b': np.array(['one', 'two', 'one', 'two', 'one', 'two'], dtype='object'),
    'c': np.array([0, 0, 0, 1, 1, 1])
})

l = l.inner_join(r, [('x', 'a', '<=')], ['x', 'y', 'a', 'b'])
print(l)

Tafra(_data={'x': array([1, 1, 1, 1, 1, 1, 2, 2, 2]), 'y': array(['one', 'one', 'one', 'one', 'one', 'one', 'two', 'two', 'two'],
      dtype=object), 'a': array([1, 1, 1, 2, 2, 2, 2, 2, 2]), 'b': array(['one', 'two', 'one', 'two', 'one', 'two', 'two', 'one', 'two'],
      dtype=object)}, _dtypes={'x': 'int', 'y': 'object', 'a': 'int', 'b': 'object'})


In [17]:
try:
    t.union(_t)
except Exception as e:
    print(e)

This `Tafra` column count does not match other `Tafra` column count.


In [18]:
t2 = t.copy()
t3 = t.copy()
t3.update_dtypes({'x': float})

In [19]:
t.union(t2)

Tafra(_data={'x': array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two', 'one',
       'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})

In [20]:
try:
    t.union(t3)
except Exception as e:
    print(e)

This `Tafra` column `x` dtype `int32` does not match other `Tafra` dtype `float64`.


In [21]:
t3 = t.copy()
t2.union(t3, inplace=True)
t2

Tafra(_data={'x': array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two', 'one',
       'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})

In [22]:
x = t[:3]
x['x'][0] = 0
t['x']

array([0, 2, 3, 4, 5, 6])

In [23]:
x = t[slice(0, 3)]
x['x'][0] = 7
t['x']

array([7, 2, 3, 4, 5, 6])

In [24]:
z = t[:3].copy()
z['x'][0] = 9
t['x']

array([7, 2, 3, 4, 5, 6])

In [25]:
a = t[t['x'] <= 4]
a['x'][1] = 15
print(a['x'])
print(t['x'])

[ 2 15  4]
[7 2 3 4 5 6]


In [26]:
t.update_dtypes({'x': float})

In [27]:
t.update_dtypes({'x': 'float'})

In [28]:
try:
    t.update_dtypes({'x': 'flot', 'y': 'st'})
except Exception as e:
    print(e)

`flot` is not a valid dtype for `x.`
`st` is not a valid dtype for `y.`



In [29]:
t.update_dtypes({'x': int})
o = t.copy()
t.update(o)
t

Tafra(_data={'x': array([7, 2, 3, 4, 5, 6]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})

In [30]:
o.update_dtypes({'x': float})
t.update(o)
t

Tafra(_data={'x': array([7., 2., 3., 4., 5., 6.]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'float', 'y': 'object', 'z': 'int'})

In [31]:
o._data['x'] = np.arange(5)
o

Tafra(_data={'x': array([0, 1, 2, 3, 4]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'float', 'y': 'object', 'z': 'int'})

In [32]:
try:
    t.update(o)
except Exception as e:
    print(e)
t

Other `Tafra` must have consistent row count. This `Tafra` has 6 rows, other `Tafra` has 5 rows.


Tafra(_data={'x': array([7., 2., 3., 4., 5., 6.]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'float', 'y': 'object', 'z': 'int'})

In [33]:
try:
    t['x'] = list(range(6))
except Exception as e:
    print(e)
t

Tafra(_data={'x': array([0, 1, 2, 3, 4, 5]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})

In [34]:
t['x'] = np.arange(6)[:, None]



In [35]:
try:
    t['x'] = np.atleast_2d(np.arange(6))
except Exception as e:
    print(e)
t



Tafra(_data={'x': array([0, 1, 2, 3, 4, 5]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})

In [36]:
t['x'] = np.atleast_2d(np.arange(6)).T
t



Tafra(_data={'x': array([0, 1, 2, 3, 4, 5]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1])}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int'})

In [37]:
try:
    t['x'] = np.atleast_2d(np.arange(6))
except Exception as e:
    print(e)



In [38]:
try:
    t['x'] = np.repeat(np.arange(6)[:, None], repeats=2, axis=1)
except Exception as e:
    print(e)

`ndarray` or `np.squeeze(ndarray)` must have ndim == 1.


In [39]:
t['d'] = np.array([np.datetime64(_, 'D') for _ in range(6)])

In [40]:
t

Tafra(_data={'x': array([0, 1, 2, 3, 4, 5]), 'y': array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object), 'z': array([0, 0, 0, 1, 1, 1]), 'd': array(['1970-01-01', '1970-01-02', '1970-01-03', '1970-01-04',
       '1970-01-05', '1970-01-06'], dtype='datetime64[D]')}, _dtypes={'x': 'int', 'y': 'object', 'z': 'int', 'd': 'date'})

In [41]:
tuple(t.to_records())

((0, 'one', 0, '1970-01-01'),
 (1, 'two', 0, '1970-01-02'),
 (2, 'one', 0, '1970-01-03'),
 (3, 'two', 1, '1970-01-04'),
 (4, 'one', 1, '1970-01-05'),
 (5, 'two', 1, '1970-01-06'))

In [42]:
t.to_list()

[array([0, 1, 2, 3, 4, 5]),
 array(['one', 'two', 'one', 'two', 'one', 'two'], dtype=object),
 array([0, 0, 0, 1, 1, 1]),
 array(['1970-01-01', '1970-01-02', '1970-01-03', '1970-01-04',
        '1970-01-05', '1970-01-06'], dtype='datetime64[D]')]

In [43]:
t = Tafra({'x': np.array([1, 2, None, 4, None])})
t.coalesce('x', [[1, 2, 3, None, 5], [None, None, None, None, 'five']])

array([1, 2, 3, 4, 5], dtype=object)