In [1]:
%run nb_helpers.py
import numpy
from datar import f
from datar.base import *
from datar.tibble import tibble

debug_kwargs = {'prefix': '\n', 'sep': f'\n{"-" * 20}\n'}

In [2]:
# constants
debug(
    pi,
    Inf,
    letters,
    LETTERS,
    month_abb,
    month_name,
    **debug_kwargs
)


pi
--------------------
3.141592653589793

Inf
--------------------
inf

letters
--------------------
array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
       'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'],
      dtype='<U1')

LETTERS
--------------------
array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
       'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'],
      dtype='<U1')

month_abb
--------------------
array(['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D'],
      dtype='<U1')

month_name
--------------------
array(['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D'],
      dtype='<U1')


In [3]:
# verbs
df = tibble(x=1, y=2, z=3)

debug(
    colnames(df),
    colnames(df, ['a', 'b', 'c']),
    rownames(df),
    rownames(df, ['row1']),
    dim(df),
    nrow(df),
    ncol(df),
    diag(1, 3),
    diag(diag(1, 3)),
    t(df),
    names(df),
    intersect([1,2], [2,3]),
    setdiff([1,2], [2,3]),
    union([1,2], [2,3]),
    setequal([1,2], [2,1]),
    duplicated([1,1,2,2]),
    duplicated([1,1,2,2], from_last=True),
    **debug_kwargs
)


colnames(df)
--------------------
array(['x', 'y', 'z'], dtype=object)

colnames(df, ['a', 'b', 'c'])
--------------------
        a       b       c
  <int64> <int64> <int64>
0       1       2       3

rownames(df)
--------------------
array([0])

rownames(df, ['row1'])
--------------------
           x       y       z
     <int64> <int64> <int64>
row1       1       2       3

dim(df)
--------------------
(1, 3)

nrow(df)
--------------------
1

ncol(df)
--------------------
3

diag(1, 3)
--------------------
        0       1       2
  <int64> <int64> <int64>
0       1       0       0
1       0       1       0
2       0       0       1

diag(diag(1, 3))
--------------------
array([1, 1, 1])

t(df)
--------------------
        0
  <int64>
x       1
y       2
z       3

names(df)
--------------------
array(['x', 'y', 'z'], dtype=object)

intersect([1,2], [2,3])
--------------------
array([2])

setdiff([1,2], [2,3])
--------------------
array([1])

union([1,2], [2,3])
------------------

In [4]:
# bessel
debug(
    bessel_i([1,2,3], 1),
    bessel_j([1,2,3], 1),
    bessel_k([1,2,3], 1),
    bessel_y([1,2,3], 1),
    **debug_kwargs
)


bessel_i([1,2,3], 1)
--------------------
array([0.5651591 , 1.59063685, 3.95337022])

bessel_j([1,2,3], 1)
--------------------
array([0.44005059, 0.57672481, 0.33905896])

bessel_k([1,2,3], 1)
--------------------
array([0.60190723, 0.13986588, 0.04015643])

bessel_y([1,2,3], 1)
--------------------
array([-0.78121282, -0.10703243,  0.32467442])


In [5]:
# casting
debug(
    as_integer(['1', 2.1]),
    as_float(['1', 2, 'nan']),
    as_double(['1', 2, 'nan']),
    as_numeric(['1', 2, 'nan']),
    **debug_kwargs
)



as_integer(['1', 2.1])
--------------------
[1, 2]

as_float(['1', 2, 'nan'])
--------------------
[1.0, 2.0, nan]

as_double(['1', 2, 'nan'])
--------------------
[1.0, 2.0, nan]

as_numeric(['1', 2, 'nan'])
--------------------
[1.0, 2.0, nan]


In [6]:
# complex numbers
cm = 1 + 2j
debug(
    im(cm),
    re(cm),
    mod(cm),
    conj(cm),
    is_complex(cm),
    as_complex(1),
    **debug_kwargs
)


im(cm)
--------------------
array([2.])

re(cm)
--------------------
array([1.])

mod(cm)
--------------------
array([2.23606798])

conj(cm)
--------------------
array([1.-2.j])

is_complex(cm)
--------------------
True

as_complex(1)
--------------------
(1+0j)


In [7]:
# cum stats
a = [1,3,2,4]

debug(
    cumsum(a),
    cumprod(a),
    cummin(a),
    cummax(a),
    **debug_kwargs
)


cumsum(a)
--------------------
array([ 1,  4,  6, 10])

cumprod(a)
--------------------
array([ 1,  3,  6, 24])

cummin(a)
--------------------
array([1, 1, 1, 1])

cummax(a)
--------------------
array([1, 3, 3, 4])


In [8]:
# date
debug(
    as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], "%d%b%Y"),
    as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], "%m/%d/%y"),
    as_date(32768, origin="1900-01-01"),
    **debug_kwargs
)


as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], "%d%b%Y")
--------------------
DatetimeIndex(['1960-01-01', '1960-01-02', '1960-03-31', '1960-07-30'], dtype='datetime64[ns]', freq=None)

as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], "%m/%d/%y")
--------------------
DatetimeIndex(['1992-02-27', '1992-02-27', '1992-01-14', '1992-02-28',
               '1992-02-01'],
              dtype='datetime64[ns]', freq=None)

as_date(32768, origin="1900-01-01")
--------------------
DatetimeIndex(['1989-09-19'], dtype='datetime64[ns]', freq=None)


In [9]:
# factor
fct = factor([1,2,3], levels=[1,2,3,4])
debug(
    levels(fct),
    droplevels(fct),
    is_factor(fct),
    as_factor([1,2,3]),
    **debug_kwargs
)


levels(fct)
--------------------
array([1, 2, 3, 4])

droplevels(fct)
--------------------
[1, 2, 3]
Categories (3, int64): [1, 2, 3]

is_factor(fct)
--------------------
True

as_factor([1,2,3])
--------------------
[1, 2, 3]
Categories (3, int64): [1, 2, 3]


In [10]:
# logical

debug(
    TRUE,
    FALSE,
    is_true(TRUE),
    not is_true([TRUE, TRUE]),
    is_false(FALSE),
    is_logical(TRUE),
    is_logical([TRUE, FALSE]),
    as_logical([0, 1]),
    **debug_kwargs
)


TRUE
--------------------
True

FALSE
--------------------
False

is_true(TRUE)
--------------------
True

not is_true([TRUE, TRUE])
--------------------
True

is_false(FALSE)
--------------------
True

is_logical(TRUE)
--------------------
True

is_logical([TRUE, FALSE])
--------------------
True

as_logical([0, 1])
--------------------
[False, True]


In [11]:
# na

debug(
    NA,
    NaN,
    NA is NaN,
    type(NA),
    is_na([NA, NaN, None]),
    any_na([1,2, NA]),
    numpy.array([1,2,NA]),
    numpy.array(['a', 'b', NA]),
    numpy.array(['a', 'b', None]),
    **debug_kwargs
)


NA
--------------------
nan

NaN
--------------------
nan

NA is NaN
--------------------
True

type(NA)
--------------------
<class 'float'>

is_na([NA, NaN, None])
--------------------
array([ True,  True,  True])

any_na([1,2, NA])
--------------------
True

numpy.array([1,2,NA])
--------------------
array([ 1.,  2., nan])

numpy.array(['a', 'b', NA])
--------------------
array(['a', 'b', 'nan'], dtype='<U32')

numpy.array(['a', 'b', None])
--------------------
array(['a', 'b', None], dtype=object)


In [12]:
# null

debug(
    NULL,
    NULL is None,
    is_null(NA),
    is_null(NULL),
    as_null('whatever'),
    **debug_kwargs
)


NULL
--------------------
None

NULL is None
--------------------
True

is_null(NA)
--------------------
False

is_null(NULL)
--------------------
True

as_null('whatever')
--------------------
None


In [13]:
# random
set_seed(8525)

sample(seq(1,100), 10) # always: [19, 61, 85, 93, 18,  5, 22,  7, 29, 26]

array([19, 61, 85, 93, 18,  5, 22,  7, 29, 26])

In [14]:
# seq
debug(
    str(c(c(1,2), seq(3,4))),
    seq_len(5),
    seq_along([4,2,1]),
    rev(seq_len(5)),
    rep([1,2], 2),
    rep([1,2], each=2),
    length([1,2]),
    length("abcd"), # string is scalar
    lengths(10),
    lengths([[1], [1,2]]),
    unique([3,3,2,4,4]), # order kept
    **debug_kwargs
)


str(c(c(1,2), seq(3,4)))
--------------------
'[1, 2, 3, 4]'

seq_len(5)
--------------------
array([1, 2, 3, 4, 5])

seq_along([4,2,1])
--------------------
array([1, 2, 3])

rev(seq_len(5))
--------------------
array([5, 4, 3, 2, 1])

rep([1,2], 2)
--------------------
array([1, 2, 1, 2])

rep([1,2], each=2)
--------------------
array([1, 1, 2, 2])

length([1,2])
--------------------
2

length("abcd")
--------------------
1

lengths(10)
--------------------
0    1
Name: x, dtype: int64

lengths([[1], [1,2]])
--------------------
0    1
1    2
Name: x, dtype: int64

unique([3,3,2,4,4])
--------------------
array([3, 2, 4])


In [15]:
# special

debug(
    beta([1,2,3], 1),
    lbeta([1,2,3], 1),
    gamma([1,2,3]),
    lgamma([1,2,3]),
    digamma([1,2,3]),
    trigamma([1,2,3]),
    psigamma([1,2,3], 1),
    choose([4,6], 2),
    lchoose([4,6], 2),
    factorial([1,5]),
    lfactorial([1,5]),
    **debug_kwargs
)


beta([1,2,3], 1)
--------------------
array([1.        , 0.5       , 0.33333333])

lbeta([1,2,3], 1)
--------------------
array([ 0.        , -0.69314718, -1.09861229])

gamma([1,2,3])
--------------------
array([1., 1., 2.])

lgamma([1,2,3])
--------------------
array([0.        , 0.        , 0.69314718])

digamma([1,2,3])
--------------------
array([-0.57721566,  0.42278434,  0.92278434])

trigamma([1,2,3])
--------------------
array([1.64493407, 0.64493407, 0.39493407])

psigamma([1,2,3], 1)
--------------------
array([1.64493407, 0.64493407, 0.39493407])

choose([4,6], 2)
--------------------
array([ 6., 15.])

lchoose([4,6], 2)
--------------------
array([1.79175947, 2.7080502 ])

factorial([1,5])
--------------------
array([  1., 120.])

lfactorial([1,5])
--------------------
array([0.        , 4.78749174])


In [16]:
# string

debug(
    is_character("a"),
    is_character(["a", "b"]),
    as_character([1,2]),
    grep(".", ["ab", "c.d"]),
    grep(".", ["ab", "c.d"], fixed=True),
    grepl(".", ["ab", "c.d"], fixed=True),
    sub(".", "x", ["ab", "c.d.e"]),
    sub(".", "x", ["ab", "c.d.e"], fixed=True),
    gsub(".", "x", ["ab", "c.d.e"]),
    gsub(".", "x", ["ab", "c.d.e"], fixed=True),
    nchar('\0'),
    nchar('\0', type='width'),
    nzchar(["a", ""]),
    paste("a", "b"),
    paste(["a", "c"], ["b", "d"], sep="."),
    paste0(["a", "c"], ["b", "d"], collapse="; "),
    sprintf("%s, %d, %.3f", pi, pi, pi),
    substr("abcde", 1, 3),
    substring("abcde", 1),
    strsplit(["a.b.c", "e.f"], ".", fixed=True),
    **debug_kwargs
)


is_character("a")
--------------------
True

is_character(["a", "b"])
--------------------
True

as_character([1,2])
--------------------
['1', '2']

grep(".", ["ab", "c.d"])
--------------------
array([0, 1])

grep(".", ["ab", "c.d"], fixed=True)
--------------------
array([1])

grepl(".", ["ab", "c.d"], fixed=True)
--------------------
array([False,  True])

sub(".", "x", ["ab", "c.d.e"])
--------------------
array(['xb', 'x.d.e'], dtype='<U5')

sub(".", "x", ["ab", "c.d.e"], fixed=True)
--------------------
array(['ab', 'cxd.e'], dtype='<U5')

gsub(".", "x", ["ab", "c.d.e"])
--------------------
array(['xx', 'xxxxx'], dtype='<U5')

gsub(".", "x", ["ab", "c.d.e"], fixed=True)
--------------------
array(['ab', 'cxdxe'], dtype='<U5')

nchar('\0')
--------------------
array([1])

nchar('\0', type='width')
--------------------
array([0])

nzchar(["a", ""])
--------------------
array([ True, False])

paste("a", "b")
--------------------
array(['a b'], dtype=object)

paste(["a", "c"], ["b

In [17]:
# table

table(rep(['a', 'b'], each=4))

Unnamed: 0,a,b
,<int64>,<int64>
count,4,4


In [18]:
# testing
debug(
    is_double(numpy.array([1,2])),
    is_integer(numpy.array([1,2])),
    is_float(numpy.array([1,2])),
    is_numeric(numpy.array([1,2])),
    is_double(numpy.array([1,2], dtype=numpy.double)),
    is_atomic("abc"),
    is_element(1, [1,2]),
    **debug_kwargs
)


is_double(numpy.array([1,2]))
--------------------
False

is_integer(numpy.array([1,2]))
--------------------
True

is_float(numpy.array([1,2]))
--------------------
False

is_numeric(numpy.array([1,2]))
--------------------
True

is_double(numpy.array([1,2], dtype=numpy.double))
--------------------
True

is_atomic("abc")
--------------------
True

is_element(1, [1,2])
--------------------
array(True)


In [19]:
# trig/hb
debug(
    cos(.5),
    sin(.5),
    tan(.5),
    acos(.5),
    asin(.5),
    atan2(.5, 1),
    cospi(.5),
    sinpi(.5),
    tanpi(.5),
    cosh(.5),
    sinh(.5),
    tanh(.5),
    acosh(1),
    asinh(.5),
    atanh(.5),
    **debug_kwargs
)


cos(.5)
--------------------
array([0.87758256])

sin(.5)
--------------------
array([0.47942554])

tan(.5)
--------------------
array([0.54630249])

acos(.5)
--------------------
array([1.04719755])

asin(.5)
--------------------
array([0.52359878])

atan2(.5, 1)
--------------------
0    0.463648
dtype: float64

cospi(.5)
--------------------
array([6.123234e-17])

sinpi(.5)
--------------------
array([1.])

tanpi(.5)
--------------------
array([1.63312394e+16])

cosh(.5)
--------------------
array([1.12762597])

sinh(.5)
--------------------
array([0.52109531])

tanh(.5)
--------------------
array([0.46211716])

acosh(1)
--------------------
array([0.])

asinh(.5)
--------------------
array([0.48121183])

atanh(.5)
--------------------
array([0.54930614])


In [20]:
# which

debug(
    which([True, False, True]),
    which_max([3,2,4,1]),
    which_min([3,2,4,1]),
    **debug_kwargs
)


which([True, False, True])
--------------------
array([0, 2])

which_max([3,2,4,1])
--------------------
2

which_min([3,2,4,1])
--------------------
3
