## Tiny Blocks Examples

In [1]:
# extract blocks
from tiny_blocks.extract import FromCSV
from tiny_blocks.extract import FromSQLQuery

# transform blocks 
from tiny_blocks.transform import DropDuplicates
from tiny_blocks.transform import Fillna
from tiny_blocks.transform import Rename
from tiny_blocks.transform import Merge

# load blocks
from tiny_blocks.load import ToCSV
from tiny_blocks.load import ToSQL

# pipeline operations
from tiny_blocks import FanIn, FanOut, Tee

# mock data
from tests.conftest import add_mocked_data
from tests.conftest import delete_mocked_data

import pandas as pd

In [2]:
add_mocked_data()

In [3]:
# check the source csv

df = pd.read_csv("/code/tests/data/source.csv", sep="|")
df

Unnamed: 0,d,e,f
0,uno,uno,7.0
1,dos,dos,
2,tres,tres,


In [4]:
# check the source sql

df = pd.read_sql_table(con="postgresql+psycopg2://user:pass@postgres:5432/db", table_name="source")
df

Unnamed: 0,a,b,c
0,uno,4,7.0
1,dos,5,8.0
2,dos,6,


### Example 1. Basic Pipeline 

In [5]:
# extract blocks
from_sql = FromSQLQuery(
    dsn_conn="postgresql+psycopg2://user:pass@postgres:5432/db", 
    sql="select * from source"
)

# transform blocks
fillna = Fillna(value="Hola Mundo")
drop_duplicates = DropDuplicates(subset=['a'])

# load blocks
to_csv = ToCSV(path="/code/tests/data/sink.csv")

In [6]:
''' 
Read from SQL -> Fill Null -> Drop Duplicates -> Write to CSV
'''

from_sql >> fillna >> drop_duplicates >> to_csv

In [7]:
df = pd.read_csv(to_csv.path, sep="|")
df

Unnamed: 0,a,b,c
0,dos,5,8.0
1,uno,4,7.0


### Example 2. Merging Pipes

In [8]:
# extract
from_sql = FromSQLQuery(
    dsn_conn="postgresql+psycopg2://user:pass@postgres:5432/db", 
    sql="select * from source"
)
from_csv = FromCSV(path="/code/tests/data/source.csv")

# transform
fillna = Fillna(value="Hola Mundo")
merge = Merge(how="left", left_on="a", right_on="d")
drop_dupl = DropDuplicates(subset=['a'])

# load
to_csv = ToCSV(path="/code/tests/data/sink.csv")

In [9]:
'''
read SQL -> FillNull -|
                      |-> Merge -> Write to CSV
read CSV -------------|
'''

FanIn(from_sql, from_csv >> fillna) >> merge >> drop_dupl >> to_csv

In [10]:
df = pd.read_csv(to_csv.path, sep="|")
df

Unnamed: 0,a,b,c,d,e,f
0,dos,5,8.0,dos,dos,Hola Mundo
1,uno,4,7.0,uno,uno,7.0


### Example 3. FanOut

In [11]:
# extract
from_sql = FromSQLQuery(
    dsn_conn="postgresql+psycopg2://user:pass@postgres:5432/db", 
    sql="select * from source"
)

# transform
fillna = Fillna(value="Hola Mundo")
drop_dupl = DropDuplicates(subset=["a"])
rename = Rename(columns={"a": "A"})

# load
to_csv = ToCSV(path="/code/tests/data/sink.csv")
to_sql = ToSQL(
    dsn_conn="postgresql+psycopg2://user:pass@postgres:5432/db", 
    table_name="FanOut"
)

In [12]:
'''
read SQL -> FillNull -> | -> Rename columns -> | -> Drop Duplicates -> Write to SQL
                                               |
                                               | -> Write to CSV

'''
from_sql >> fillna >> rename >> FanOut(to_csv) >> drop_dupl >> to_sql

In [13]:
df = pd.read_csv(to_csv.path, sep="|")
df

Unnamed: 0,A,b,c
0,uno,4,7.0
1,dos,5,8.0
2,dos,6,Hola Mundo


In [14]:
df = pd.read_sql_table(
    con="postgresql+psycopg2://user:pass@postgres:5432/db", 
    table_name="FanOut"
)
df

Unnamed: 0,A,b,c
0,dos,5,8.0
1,uno,4,7.0


### Example 4. Branching with Tee

In [15]:
# extract
from_sql = FromSQLQuery(
    dsn_conn="postgresql+psycopg2://user:pass@postgres:5432/db", 
    sql="select * from source"
)

# transform
fillna = Fillna(value="Hola Mundo")
drop_dupl = DropDuplicates(subset=["a"])
rename = Rename(columns={"a": "A"})

# load
to_csv = ToCSV(path="/code/tests/data/sink.csv")
to_sql = ToSQL(
    dsn_conn="postgresql+psycopg2://user:pass@postgres:5432/db", 
    table_name="Tee"
)

In [16]:
'''

                        | -> Drop Duplicates -> Write to CSV
read SQL -> FillNull -> |
                        | -> Rename columns -> Write to SQL

'''
pipe_1 = drop_dupl >> to_csv
pipe_2 = rename >> to_sql

from_sql >> fillna >> Tee(pipe_1, pipe_2)

# same as ...
# from_sql >> fillna >> Tee(drop_dupl >> to_csv, rename >> to_sql)

In [17]:
df = pd.read_csv(to_csv.path, sep="|")
df

Unnamed: 0,a,b,c
0,dos,5,8.0
1,uno,4,7.0


In [18]:
df = pd.read_sql_table(
    con="postgresql+psycopg2://user:pass@postgres:5432/db", 
    table_name="Tee"
)
df

Unnamed: 0,A,b,c
0,uno,4,7.0
1,dos,5,8.0
2,dos,6,Hola Mundo
