In [2]:
# datar specific

import numpy
from datar import f
from datar.data import iris
from datar.base import as_date, factor, c
from datar.other import *
from datar.dplyr import mutate, group_by
from datar.tibble import tibble

%run nb_helpers.py
nb_header(
    # get, 
    # flatten, 
    itemgetter, 
    attrgetter, 
    pd_str, 
    pd_cat, 
    pd_dt, 
    book='datar',
)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ itemgetter</div>

##### Itemgetter as a function for verb

In datar expression, we can do:  
>>> arr = [1,2,3]  
>>> tibble(x=2) >> mutate(y=arr[f.x])  

Since `arr[f.x]` won't compile. We need to use the `itemgetter` operator:  
>>> tibble(x=2) >> mutate(y=itemgetter(arr, f.x))  

##### Args:
&emsp;&emsp;`data`: The data to be get items from  
&emsp;&emsp;`subscr`: The subscripts  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ attrgetter</div>

##### Attrgetter as a function for verb

This is helpful when we want to access to an accessor  
(ie. CategoricalAccessor) from a SeriesGroupBy object  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ pd_str</div>

##### Pandas' str accessor for a Series (x.str)

This is helpful when x is a SeriesGroupBy object  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ pd_cat</div>

##### Pandas' cat accessor for a Series (x.cat)

This is helpful when x is a SeriesGroupBy object  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ pd_dt</div>

##### Pandas' dt accessor for a Series (x.dt)

This is helpful when x is a SeriesGroupBy object  


In [3]:
# iris >> get(c[:5])

In [4]:
# iris >> get(cols=f.Species)

In [5]:
# select single element
# iris >> get(1, f.Species)

In [6]:
# get it as a single-element dataframe
# iris >> get([1], f.Species)

In [7]:
# or 
# iris >> get(1, [f.Species])

In [8]:
df = tibble(x=c[1:3], y=c[3:5])
# df >> flatten()

In [12]:
arr = numpy.array(['a', 'b', 'c', 'd', 'e'])
# df >> mutate(a=arr[f.x], b=arr[f.y])  # Error
df >> mutate(a=itemgetter(arr, f.x.values), b=itemgetter(arr, f.y.values))

Unnamed: 0,x,y,a,b
,<int64>,<int64>,<object>,<object>
0.0,1,3,b,d
1.0,2,4,c,e


In [13]:
df = tibble(x=["abc", "def"])
df >> mutate(a=attrgetter(f.x, 'str').upper())

Unnamed: 0,x,a
,<object>,<object>
0.0,abc,ABC
1.0,def,DEF


In [14]:
# or
# df >> mutate(a=pd_str(f.x).upper())
# or
df >> mutate(a=f.x.str.upper())

Unnamed: 0,x,a
,<object>,<object>
0.0,abc,ABC
1.0,def,DEF


In [15]:
# but when df is grouped
gf = df >> group_by(g=[1, 2])
# pd_str(gf.x)[:2].obj
gf >> mutate(a=pd_str(gf.x)[:2])

Unnamed: 0,x,g,a
,<object>,<int64>,<object>
0.0,abc,1,ab
1.0,def,2,de


In [16]:
gf = (
    tibble(x=["2022-01-01", "2022-12-02"])
    >> mutate(x=as_date(f.x, format="%Y-%m-%d"))
    >> group_by(g=[1, 2])
)
gf >> mutate(month=pd_dt(gf.x).month)

Unnamed: 0,x,g,month
,<datetime64[ns]>,<int64>,<int64>
0.0,2022-01-01,1,1
1.0,2022-12-02,2,12


In [17]:
gf = (
    tibble(x=factor([1, 2], levels=[1, 2, 3]))
    >> group_by(g=[1, 2])
)
gf >> mutate(codes=pd_cat(gf.x).codes)

Unnamed: 0,x,g,codes
,<category>,<int64>,<int8>
0.0,1,1,0
1.0,2,2,1
