# Nested Structures

## Nested Accessor

In [1]:
import pandas as pd

In [2]:
from colassigner import ColAccessor

In [3]:
class GrandChildCols(ColAccessor):
    x = str
    y = str


class ChildCols(ColAccessor):
    a = int
    b = float
    grandchild_a = GrandChildCols
    grandchild_b = GrandChildCols


class Cols(ColAccessor):

    fing = int
    assigned_child = ChildCols

    class InheritedChild(ChildCols):
        pass

In [4]:
pd.DataFrame(
    {
        Cols.fing: [2, 3, 4],
        Cols.assigned_child.grandchild_a.y: ["a", "b", "c"],
        Cols.InheritedChild.b: [0.1, 0.2, 0.3],
    }
)

Unnamed: 0,fing,assigned_child__grandchild_a__y,inherited_child__b
0,2,a,0.1
1,3,b,0.2
2,4,c,0.3


## Nested Assigner

In [5]:
from colassigner import ColAssigner

In [6]:
class SourceCols(ColAccessor):
    
    x = float
    b = bool

class SepChild(ColAssigner):
    _col = SourceCols.x
    
    def neg(self, df):
        return -df[self._col]
    
    def double(self, df):
        return 2 * df[self._col]

class Cols(ColAssigner):
    def col_one(self, df):
        return 1

    class SubCol(ColAssigner):
        def fing(self, df):
            return df.sum(axis=1)

        class SubSubCol(ColAssigner):
            _prefix = "pref_"

            def sub_x(self, df):
                return 0

            def sub_y(self, df):
                return self._prefix + df[Cols.col_one].astype(str)

        class SubSubCol2(SubSubCol):
            _prefix = "pref2_"

    sep_child = SepChild
    
    class SepChildB(SepChild):
        _col = SourceCols.b

In [7]:
df = pd.DataFrame({
    SourceCols.x: [1.5, 3.4, 9.1], SourceCols.b: [False, True, True]
}).pipe(Cols())

In [8]:
df.T

Unnamed: 0,0,1,2
x,1.5,3.4,9.1
b,False,True,True
col_one,1,1,1
sub_col__fing,2.5,5.4,11.1
sub_col__sub_sub_col__sub_x,0,0,0
sub_col__sub_sub_col__sub_y,pref_1,pref_1,pref_1
sub_col__sub_sub_col_2__sub_x,0,0,0
sub_col__sub_sub_col_2__sub_y,pref2_1,pref2_1,pref2_1
sep_child__neg,-1.5,-3.4,-9.1
sep_child__double,3.0,6.8,18.2


In [9]:
df.loc[:, [Cols.sep_child.double, Cols.SubCol.SubSubCol2.sub_x]]

Unnamed: 0,sep_child__double,sub_col__sub_sub_col_2__sub_x
0,3.0,0
1,6.8,0
2,18.2,0


## Designated Child Assigner

> These are designed for information sharing among assigners and **do not** take the dataframe as arguments for their methods but, take both the df and their parent assigner as parameters for their `__init__`

In [10]:
import numpy as np

from colassigner import ChildColAssigner

In [11]:
class RawCols(ColAccessor):
    
    cat = str
    num = int

class RawCols2(ColAccessor):
    b = str
    c = str

class IntSides(ChildColAssigner):
    
    # note the type and order of the parameters:
    def __init__(self, df, parent_assigner: "GbReindex") -> None:
        self.arr = parent_assigner.arr

    # note the absence of parameters
    def lower(self):
        return np.floor(self.arr).astype(int)

    def upper(self):
        return np.ceil(self.arr).astype(int)

class GbReindex(ChildColAssigner):
    main_col = ...

    def __init__(self, df, bc: "BaseCols"):
        # note that this reindex needs to be done only once
        # and can be used in many child assigners
        self.arr = bc.base_gb.reindex(df[self.main_col]).values

    def values(self):
        return self.arr

    sides = IntSides

class BaseCols(ColAssigner):
    def __init__(self, base_df):
        self.base_gb = base_df.groupby(RawCols.cat)[RawCols.num].mean()

    class GbB(GbReindex):
        main_col = RawCols2.b

    class GbC(GbReindex):
        main_col = RawCols2.c

    def prod(self, df):
        return df.loc[
            :, [BaseCols.GbB.sides.lower, BaseCols.GbC.values]
        ].prod(axis=1)

In [12]:
df1 = pd.DataFrame({RawCols.cat: ["x", "y", "y"], RawCols.num: [2, 3, 4]})

In [13]:
assigner = BaseCols(df1)

In [14]:
df2 = pd.DataFrame({"b": ["x", "y", "x"], "c": ["y", "y", "x"]}).pipe(assigner)

In [15]:
df2

Unnamed: 0,b,c,gb_b__values,gb_b__sides__lower,gb_b__sides__upper,gb_c__values,gb_c__sides__lower,gb_c__sides__upper,prod
0,x,y,2.0,2,2,3.5,3,4,7.0
1,y,y,3.5,3,4,3.5,3,4,10.5
2,x,x,2.0,2,2,2.0,2,2,4.0
