# NestedFrame `map_rows` Demo

In [4]:
from nested_pandas.datasets import generate_data
import numpy as np

In [33]:
nf = generate_data(3,10, seed=1)
nf

Unnamed: 0_level_0,a,b,nested
t,flux,band,Unnamed: 3_level_1
t,flux,band,Unnamed: 3_level_2
t,flux,band,Unnamed: 3_level_3
0,0.417022,0.604665,t  flux  band  3.725204  68.650093  r  +9 rows  ...  ...
t,flux,band,
3.725204,68.650093,r,
+9 rows,...,...,
1,0.720324,0.293512,t  flux  band  6.911215  83.462567  g  +9 rows  ...  ...
t,flux,band,
6.911215,83.462567,g,
+9 rows,...,...,
2,0.000114,0.184677,t  flux  band  7.935349  1.828828  r  +9 rows  ...  ...
t,flux,band,

t,flux,band
3.725204,68.650093,r
+9 rows,...,...

t,flux,band
6.911215,83.462567,g
+9 rows,...,...

t,flux,band
7.935349,1.828828,r
+9 rows,...,...


## The "default" example -- dictionary packaging

In [34]:
# row is a dict with keys corresponding to the rows of the nestedframe
def example_func(row):
    return np.mean(row["nested.t"]), np.mean(row["nested.t"]) - row["a"]

nf.map_rows(example_func, output_names=["mean", "mean_minus_base"])

Unnamed: 0,mean,mean_minus_base
0,8.511203,8.094181
1,9.49825,8.777925
2,10.795447,10.795333


## Column Selection

In [35]:
# setting columns sub-selects which columns are packaged into the dict
nf.map_rows(example_func, columns=["a", "nested.t"], output_names=["mean", "mean_minus_base"])

Unnamed: 0,mean,mean_minus_base
0,8.511203,8.094181
1,9.49825,8.777925
2,10.795447,10.795333


### Performance Impacts of Column Selection

In [36]:
%%timeit
nf.map_rows(example_func, output_names=["mean", "mean_minus_base"])

789 μs ± 24.4 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [37]:
%%timeit
nf.map_rows(example_func, columns=["a", "nested.t"], output_names=["mean", "mean_minus_base"])

281 μs ± 21.4 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


### Selecting one Nested Structure

In [38]:
# Write a function that programmatically returns the first value from each column in the row
def first_val(row):
    return {"first_"+key.split(".")[1]:row[key][0] for key in row.keys()}

# Can select all nested subcolumns by just passing the name of the nested column
nf.map_rows(first_val, columns="nested")

Unnamed: 0,first_t,first_flux,first_band
0,3.725204,68.650093,r
1,6.911215,83.462567,g
2,7.935349,1.828828,r


## `args`-based inputs

In [39]:
# Write a function that takes in individual arguments
def example_func(a, time):
    return np.mean(time), np.mean(time) - a

# Setting row_container="args" passes the inputs as individual arguments
nf.map_rows(example_func,
            columns=["a", "nested.t"],
            output_names=["mean", "mean_minus_base"],
            row_container="args")

Unnamed: 0,mean,mean_minus_base
0,8.511203,8.094181
1,9.49825,8.777925
2,10.795447,10.795333


## Non-df inputs

In [40]:
def example_func(row, scale):
    return np.mean(row["nested.t"]) * scale

# Arguments that are not tied to a column can be passed separately
nf.map_rows(example_func, columns=["nested.t"], output_names="mean", scale=20)

Unnamed: 0,mean
0,170.224051
1,189.964999
2,215.908938


## Returning Nested Structures

In [42]:
def example_func(row):
    '''map_rows will return a NestedFrame with nested structure'''
    return row["nested.t"] - row["a"], row["nested.t"] - row["b"]

# behavior controlled by "infer_nesting" kwarg, which is true by default
nf.map_rows(example_func, output_names=["offsets.t_a", "offsets.t_b"])

Unnamed: 0_level_0,offsets
t_a,t_b
t_a,t_b
t_a,t_b
0,t_a  t_b  3.308182  3.120539  +9 rows  ...
t_a,t_b
3.308182,3.120539
+9 rows,...
1,t_a  t_b  6.19089  6.617703  +9 rows  ...
t_a,t_b
6.19089,6.617703
+9 rows,...
2,t_a  t_b  7.935235  7.750672  +9 rows  ...
t_a,t_b

t_a,t_b
3.308182,3.120539
+9 rows,...

t_a,t_b
6.19089,6.617703
+9 rows,...

t_a,t_b
7.935235,7.750672
+9 rows,...
