In [1]:
import pandas as pd

import sys
sys.path.append('../src')
from cellopype import Cell, Pype

In [3]:
df1 = pd.DataFrame([
    ['tom', 10], ['nick', 15], ['julie', 14]], 
    columns=['name', 'age'])
df2 = pd.DataFrame(
    [['tom', 'USA'], ['nick', 'UK'], ['julie', 'France']], 
    columns=['name', 'country'])

In [4]:
df1

Unnamed: 0,name,age
0,tom,10
1,nick,15
2,julie,14


In [5]:
pp = Pype()

In [6]:
# 1. df1 & df2
pp.df1 = Cell(recalc=lambda: df1)
pp.df2 = Cell(recalc=lambda: df2)

In [8]:
# repeat this to check status where necessary
for c in ['df1','df2']:
    cell = pp[c]
    print(cell.name, ":", cell._dirty, cell._value)

df1 : True None
df2 : True None


In [9]:
# 2. above_ten: first derived cell
# above_ten := df1_cell where age > 10
def recalc(df1):
    return( df1
        .assign(done=True)
        .query("age>10")
    )
pp.above_ten = Cell(recalc=recalc, sources=[pp.df1])
pp.above_ten._value    # lazy eval, so no _value yet

In [10]:
# 3. UK_above_ten: second derived cell
# UK_above_ten := above_ten merged with df2_cell where country==UK 
def recalc(above_ten, df2):
    return( above_ten
        .merge(df2)
        .fillna('')
        .query("country=='UK'")
    )
pp.UK_above_ten = Cell(recalc=recalc, sources=[pp.above_ten, pp.df2])
pp.UK_above_ten.value     # = value() : trigger evaluation

Unnamed: 0,name,age,done,country
0,nick,15,True,UK


In [11]:
# p.df2._value has a value now
pp.df2._value

Unnamed: 0,name,country
0,tom,USA
1,nick,UK
2,julie,France


In [12]:
df1.loc[1,'age']=12
pp.df1.invalidate()
# all depending cells invalidated -> dirty: True

In [13]:
pp.above_ten._dirty

True

In [14]:
pp.UK_above_ten.value

Unnamed: 0,name,age,done,country
0,nick,12,True,UK
