In [1]:
import numpy
from datar import f
from datar.base import *
from datar.tibble import tibble

%run nb_helpers.py
debug_kwargs = {'prefix': '\n', 'sep': f'\n{"-" * 20}\n'}



In [2]:
# constants
debug(
    pi,
    Inf,
    letters,
    LETTERS,
    month_abb,
    month_name,
    **debug_kwargs
)


pi
--------------------
3.141592653589793

Inf
--------------------
inf

letters
--------------------
array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
       'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'],
      dtype='<U1')

LETTERS
--------------------
array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
       'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'],
      dtype='<U1')

month_abb
--------------------
array(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
       'Oct', 'Nov', 'Dec'], dtype='<U3')

month_name
--------------------
array(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December'],
      dtype='<U9')


In [3]:
# options

debug(options(), **debug_kwargs)
debug(get_option('index.base.0'), **debug_kwargs)
with options_context(index_base_0=True):
    debug(get_option('index.base.0'), **debug_kwargs)
debug(get_option('index.base.0'), **debug_kwargs)


options()
--------------------
Diot({'index_base_0': False, 'which_base_0': True, 'dplyr_summarise_inform': True, 'warn_builtin_names': True, 'add_option': True, 'frame_format_patch': True}, diot_transform=<function <lambda> at 0x7f267a758cb0>)

get_option('index.base.0')
--------------------
False

get_option('index.base.0')
--------------------
True

get_option('index.base.0')
--------------------
False


In [4]:
# verbs
df = tibble(x=1, y=2, z=3)

debug(
    colnames(df),
    colnames(df, ['a', 'b', 'c']),
    rownames(df),
    rownames(df, ['row1']),
    dim(df),
    nrow(df),
    ncol(df),
    diag(1, 3),
    diag(diag(1, 3)),
    t(df),
    names(df),
    names({'a': 1, 'b': 2}),
    names({'a': 1, 'b': 2}, ['x', 'y']),
    intersect([1,2], [2,3]),
    setdiff([1,2], [2,3]),
    union([1,2], [2,3]),
    setequal([1,2], [2,1]),
    duplicated([1,1,2,2]),
    duplicated([1,1,2,2], from_last=True),
    **debug_kwargs
)


colnames(df)
--------------------
['x', 'y', 'z']

colnames(df, ['a', 'b', 'c'])
--------------------
        a       b       c
  <int64> <int64> <int64>
0       1       2       3

rownames(df)
--------------------
[0]

rownames(df, ['row1'])
--------------------
           x       y       z
     <int64> <int64> <int64>
row1       1       2       3

dim(df)
--------------------
(1, 3)

nrow(df)
--------------------
1

ncol(df)
--------------------
3

diag(1, 3)
--------------------
        0       1       2
  <int64> <int64> <int64>
0       1       0       0
1       0       1       0
2       0       0       1

diag(diag(1, 3))
--------------------
array([1, 1, 1])

t(df)
--------------------
        0
  <int64>
x       1
y       2
z       3

names(df)
--------------------
['x', 'y', 'z']

names({'a': 1, 'b': 2})
--------------------
['a', 'b']

names({'a': 1, 'b': 2}, ['x', 'y'])
--------------------
{'x': 1, 'y': 2}

intersect([1,2], [2,3])
--------------------
[2]

setdiff([1,2], [2

In [5]:
# funs

debug(
    cut(seq(1,10), 3), 
    identity(1.23),
    expandgrid([1,2], [3,4]),
    **debug_kwargs
)
with data_context(tibble(a=[1,2], b=[3,4])) as _:
    expandgrid(f.a, f.b)


cut(seq(1,10), 3)
--------------------
[(0.99, 4.0], (0.99, 4.0], (0.99, 4.0], (0.99, 4.0], (4.0, 7.0], (4.0, 7.0], (4.0, 7.0], (7.0, 10.0], (7.0, 10.0], (7.0, 10.0]]
Categories (3, interval[float64]): [(0.99, 4.0] < (4.0, 7.0] < (7.0, 10.0]]

identity(1.23)
--------------------
1.23

expandgrid([1,2], [3,4])
--------------------
     Var0    Var1
  <int64> <int64>
0       1       3
1       1       4
2       2       3
3       2       4


Unnamed: 0,a,b
,<int64>,<int64>
0.0,1,3
1.0,1,4
2.0,2,3
3.0,2,4


In [6]:
# arithmetic
arr = [1,2,4,6]
debug(
    mean(arr),
    median(arr),
    min(arr),
    max(arr),
    sum(arr),
    abs([1, -1]),
    round([1.4, 1.5]),
    all([True, False]),
    any([True, False]),
    pmin([1,4], [2,3]),
    pmax([1,4], [2,3]),
    var(arr),
    ceiling([1.1, 2.1]),
    floor([1.1, 2.1]),
    sqrt([1.1, 2.1]),
    cov([1,2,3], [3,2,1]),
    **debug_kwargs
)


mean(arr)
--------------------
3.25

median(arr)
--------------------
3.0

min(arr)
--------------------
1

max(arr)
--------------------
6

sum(arr)
--------------------
13

abs([1, -1])
--------------------
array([1, 1])

round([1.4, 1.5])
--------------------
array([1., 2.])

all([True, False])
--------------------
True

any([True, False])
--------------------
True

pmin([1,4], [2,3])
--------------------
array([1, 3])

pmax([1,4], [2,3])
--------------------
array([2, 4])

var(arr)
--------------------
4.916666666666667

ceiling([1.1, 2.1])
--------------------
array([2., 3.])

floor([1.1, 2.1])
--------------------
array([1., 2.])

sqrt([1.1, 2.1])
--------------------
array([1.04880885, 1.44913767])

cov([1,2,3], [3,2,1])
--------------------
-1.0


In [7]:
# bessel
debug(
    bessel_i([1,2,3], 1),
    bessel_j([1,2,3], 1),
    bessel_k([1,2,3], 1),
    bessel_y([1,2,3], 1),
    **debug_kwargs
)


bessel_i([1,2,3], 1)
--------------------
array([0.5651591 , 1.59063685, 3.95337022])

bessel_j([1,2,3], 1)
--------------------
array([0.44005059, 0.57672481, 0.33905896])

bessel_k([1,2,3], 1)
--------------------
array([0.60190723, 0.13986588, 0.04015643])

bessel_y([1,2,3], 1)
--------------------
array([-0.78121282, -0.10703243,  0.32467442])


In [8]:
# casting
debug(
    as_integer(['1', 2.1]),
    as_float(['1', 2, 'nan']),
    as_double(['1', 2, 'nan']),
    as_numeric(['1', 2, 'nan']),
    **debug_kwargs
)



as_integer(['1', 2.1])
--------------------
[1, 2]

as_float(['1', 2, 'nan'])
--------------------
[1.0, 2.0, nan]

as_double(['1', 2, 'nan'])
--------------------
[1.0, 2.0, nan]

as_numeric(['1', 2, 'nan'])
--------------------
[1.0, 2.0, nan]


In [9]:
# complex numbers
cm = 1 + 2j
debug(
    im(cm),
    re(cm),
    mod(cm),
    conj(cm),
    is_complex(cm),
    as_complex(1),
    **debug_kwargs
)


im(cm)
--------------------
2.0

re(cm)
--------------------
1.0

mod(cm)
--------------------
2.23606797749979

conj(cm)
--------------------
(1-2j)

is_complex(cm)
--------------------
True

as_complex(1)
--------------------
(1+0j)


In [10]:
# cum stats
a = [1,3,2,4]

debug(
    cumsum(a),
    cumprod(a),
    cummin(a),
    cummax(a),
    **debug_kwargs
)


cumsum(a)
--------------------
array([ 1,  4,  6, 10])

cumprod(a)
--------------------
array([ 1,  3,  6, 24])

cummin(a)
--------------------
0    1
1    1
2    1
3    1
dtype: int64

cummax(a)
--------------------
0    1
1    3
2    3
3    4
dtype: int64


In [11]:
# date
debug(
    as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], "%d%b%Y"),
    as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], "%m/%d/%y"),
    as_date(32768, origin="1900-01-01"),
    **debug_kwargs
)


as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], "%d%b%Y")
--------------------
0    1960-01-01
1    1960-01-02
2    1960-03-31
3    1960-07-30
dtype: object

as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], "%m/%d/%y")
--------------------
0    1992-02-27
1    1992-02-27
2    1992-01-14
3    1992-02-28
4    1992-02-01
dtype: object

as_date(32768, origin="1900-01-01")
--------------------
0    1989-09-19
dtype: object


In [12]:
# factor
fct = factor([1,2,3], levels=[1,2,3,4])
debug(
    levels(fct),
    droplevels(fct),
    is_factor(fct),
    as_factor([1,2,3]),
    **debug_kwargs
)


levels(fct)
--------------------
Int64Index([1, 2, 3, 4], dtype='int64')

droplevels(fct)
--------------------
[1, 2, 3]
Categories (3, int64): [1, 2, 3]

is_factor(fct)
--------------------
True

as_factor([1,2,3])
--------------------
[1, 2, 3]
Categories (3, int64): [1, 2, 3]


In [13]:
# logical

debug(
    TRUE,
    FALSE,
    is_true(TRUE),
    not is_true([TRUE, TRUE]),
    is_false(FALSE),
    is_logical(TRUE),
    is_logical([TRUE, FALSE]),
    as_logical([0, 1]),
    **debug_kwargs
)


TRUE
--------------------
True

FALSE
--------------------
False

is_true(TRUE)
--------------------
True

not is_true([TRUE, TRUE])
--------------------
True

is_false(FALSE)
--------------------
True

is_logical(TRUE)
--------------------
True

is_logical([TRUE, FALSE])
--------------------
True

as_logical([0, 1])
--------------------
[False, True]


In [14]:
# na

debug(
    NA,
    NaN,
    NA is NaN,
    type(NA),
    is_na([NA, NaN, None]),
    any_na([1,2, NA]),
    numpy.array([1,2,NA]),
    numpy.array(['a', 'b', NA]),
    numpy.array(['a', 'b', None]),
    **debug_kwargs
)


NA
--------------------
nan

NaN
--------------------
nan

NA is NaN
--------------------
True

type(NA)
--------------------
<class 'float'>

is_na([NA, NaN, None])
--------------------
array([ True,  True,  True])

any_na([1,2, NA])
--------------------
True

numpy.array([1,2,NA])
--------------------
array([ 1.,  2., nan])

numpy.array(['a', 'b', NA])
--------------------
array(['a', 'b', 'nan'], dtype='<U32')

numpy.array(['a', 'b', None])
--------------------
array(['a', 'b', None], dtype=object)


In [15]:
# null

debug(
    NULL,
    NULL is None,
    is_null(NA),
    is_null(NULL),
    as_null('whatever'),
    **debug_kwargs
)


NULL
--------------------
None

NULL is None
--------------------
True

is_null(NA)
--------------------
False

is_null(NULL)
--------------------
True

as_null('whatever')
--------------------
None


In [16]:
# random
set_seed(8525)

sample(seq(1,100), 10) # always: [19, 61, 85, 93, 18,  5, 22,  7, 29, 26]

array([19, 61, 85, 93, 18,  5, 22,  7, 29, 26])

In [17]:
# seq
debug(
    str(c(c(1,2), seq(3,4))),
    seq_len(5),
    seq_len(5, base0_=True),
    seq_along([4,2,1]),
    rev(seq_len(5)),
    rep([1,2], 2),
    rep([1,2], each=2),
    length([1,2]),
    length("abcd"), # string is scalar
    lengths(10),
    lengths([[1], [1,2]]),
    unique([3,3,2,4,4]), # order kept
    **debug_kwargs
)


str(c(c(1,2), seq(3,4)))
--------------------
'[1, 2, 3, 4]'

seq_len(5)
--------------------
array([1, 2, 3, 4, 5])

seq_len(5, base0_=True)
--------------------
array([0, 1, 2, 3, 4])

seq_along([4,2,1])
--------------------
array([1, 2, 3])

rev(seq_len(5))
--------------------
array([5, 4, 3, 2, 1])

rep([1,2], 2)
--------------------
array([1, 2, 1, 2])

rep([1,2], each=2)
--------------------
array([1, 1, 2, 2])

length([1,2])
--------------------
2

length("abcd")
--------------------
1

lengths(10)
--------------------
array([1])

lengths([[1], [1,2]])
--------------------
array([1, 2])

unique([3,3,2,4,4])
--------------------
array([3, 2, 4])


In [18]:
# special

debug(
    beta([1,2,3], 1),
    lbeta([1,2,3], 1),
    gamma([1,2,3]),
    lgamma([1,2,3]),
    digamma([1,2,3]),
    trigamma([1,2,3]),
    psigamma([1,2,3], 1),
    choose([4,6], 2),
    lchoose([4,6], 2),
    factorial([1,5]),
    lfactorial([1,5]),
    **debug_kwargs
)


beta([1,2,3], 1)
--------------------
array([1.        , 0.5       , 0.33333333])

lbeta([1,2,3], 1)
--------------------
array([ 0.        , -0.69314718, -1.09861229])

gamma([1,2,3])
--------------------
array([1., 1., 2.])

lgamma([1,2,3])
--------------------
array([0.        , 0.        , 0.69314718])

digamma([1,2,3])
--------------------
array([-0.57721566,  0.42278434,  0.92278434])

trigamma([1,2,3])
--------------------
array([1.64493407, 0.64493407, 0.39493407])

psigamma([1,2,3], 1)
--------------------
array([1.64493407, 0.64493407, 0.39493407])

choose([4,6], 2)
--------------------
array([ 6., 15.])

lchoose([4,6], 2)
--------------------
array([1.79175947, 2.7080502 ])

factorial([1,5])
--------------------
array([  1., 120.])

lfactorial([1,5])
--------------------
array([0.        , 4.78749174])


In [19]:
# string

debug(
    is_character("a"),
    is_character(["a", "b"]),
    as_character([1,2]),
    grep(".", ["ab", "c.d"]),
    grep(".", ["ab", "c.d"], fixed=True),
    grep(".", ["ab", "c.d"], fixed=True, base0_=False),
    grepl(".", ["ab", "c.d"], fixed=True),
    sub(".", "x", ["ab", "c.d.e"]),
    sub(".", "x", ["ab", "c.d.e"], fixed=True),
    gsub(".", "x", ["ab", "c.d.e"]),
    gsub(".", "x", ["ab", "c.d.e"], fixed=True),
    nchar('\0'),
    nchar('\0', type='width'),
    nzchar(["a", ""]),
    paste("a", "b"),
    paste(["a", "c"], ["b", "d"], sep="."),
    paste0(["a", "c"], ["b", "d"], collapse="; "),
    sprintf("%s, %d, %.3f", pi, pi, pi),
    substr("abcde", 1, 3),
    substr("abcde", 1, 3, base0_=True),
    substring("abcde", 1),
    strsplit(["a.b.c", "e.f"], ".", fixed=True),
    **debug_kwargs
)


is_character("a")
--------------------
True

is_character(["a", "b"])
--------------------
True

as_character([1,2])
--------------------
['1', '2']

grep(".", ["ab", "c.d"])
--------------------
array([0, 1])

grep(".", ["ab", "c.d"], fixed=True)
--------------------
array([1])

grep(".", ["ab", "c.d"], fixed=True, base0_=False)
--------------------
array([2])

grepl(".", ["ab", "c.d"], fixed=True)
--------------------
array([False,  True])

sub(".", "x", ["ab", "c.d.e"])
--------------------
array(['xb', 'x.d.e'], dtype=object)

sub(".", "x", ["ab", "c.d.e"], fixed=True)
--------------------
array(['ab', 'cxd.e'], dtype=object)

gsub(".", "x", ["ab", "c.d.e"])
--------------------
array(['xx', 'xxxxx'], dtype=object)

gsub(".", "x", ["ab", "c.d.e"], fixed=True)
--------------------
array(['ab', 'cxdxe'], dtype=object)

nchar('\0')
--------------------
array([1])

nchar('\0', type='width')
--------------------
array([0])

nzchar(["a", ""])
--------------------
array([ True, False])



In [20]:
# table

table(rep(['a', 'b'], each=4))

Unnamed: 0,a,b
,<int64>,<int64>
count,4,4


In [21]:
# testing
debug(
    is_double(numpy.array([1,2])),
    is_integer(numpy.array([1,2])),
    is_float(numpy.array([1,2])),
    is_numeric(numpy.array([1,2])),
    is_double(numpy.array([1,2], dtype=numpy.double)),
    is_atomic("abc"),
    is_element(1, [1,2]),
    **debug_kwargs
)


is_double(numpy.array([1,2]))
--------------------
False

is_integer(numpy.array([1,2]))
--------------------
True

is_float(numpy.array([1,2]))
--------------------
False

is_numeric(numpy.array([1,2]))
--------------------
True

is_double(numpy.array([1,2], dtype=numpy.double))
--------------------
True

is_atomic("abc")
--------------------
True

is_element(1, [1,2])
--------------------
True


In [22]:
# trig/hb
debug(
    cos(.5),
    sin(.5),
    tan(.5),
    acos(.5),
    asin(.5),
    atan2(.5, 1),
    cospi(.5),
    sinpi(.5),
    tanpi(.5),
    cosh(.5),
    sinh(.5),
    tanh(.5),
    acosh(1),
    asinh(.5),
    atanh(.5),
    **debug_kwargs
)


cos(.5)
--------------------
0.8775825618903728

sin(.5)
--------------------
0.479425538604203

tan(.5)
--------------------
0.5463024898437905

acos(.5)
--------------------
1.0471975511965979

asin(.5)
--------------------
0.5235987755982989

atan2(.5, 1)
--------------------
0.4636476090008061

cospi(.5)
--------------------
6.123233995736766e-17

sinpi(.5)
--------------------
1.0

tanpi(.5)
--------------------
1.633123935319537e+16

cosh(.5)
--------------------
1.1276259652063807

sinh(.5)
--------------------
0.5210953054937474

tanh(.5)
--------------------
0.46211715726000974

acosh(1)
--------------------
0.0

asinh(.5)
--------------------
0.48121182505960347

atanh(.5)
--------------------
0.5493061443340548


In [23]:
# which

debug(
    which([True, False, True]),
    which([True, False, True], base0_=False),
    which_max([3,2,4,1]),
    which_min([3,2,4,1]),
    **debug_kwargs
)


which([True, False, True])
--------------------
array([0, 2])

which([True, False, True], base0_=False)
--------------------
array([1, 3])

which_max([3,2,4,1])
--------------------
2

which_min([3,2,4,1])
--------------------
3
