# Persistance

An example about storing and loading pipesnake pipes.

In [1]:
# if you cloned the repository you can do:
import sys
sys.path.append('../')

import logging
import pandas

logging.getLogger().setLevel(logging.DEBUG)

# Load some data

More dataset are available here: https://archive.ics.uci.edu/ml/datasets.html

_Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science._

In [2]:
df = pandas.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data', header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [3]:
x = df[[0, 1, 2, 3, 4, 5]]
y = df[[6]]

In [4]:
x.head()

Unnamed: 0,0,1,2,3,4,5
0,vhigh,vhigh,2,2,small,low
1,vhigh,vhigh,2,2,small,med
2,vhigh,vhigh,2,2,small,high
3,vhigh,vhigh,2,2,med,low
4,vhigh,vhigh,2,2,med,med


In [5]:
y.head()

Unnamed: 0,6
0,unacc
1,unacc
2,unacc
3,unacc
4,unacc


# Create a preprocessing pipeline using `pipesnake`

In [6]:
from pipesnake.pipe import SeriesPipe
from pipesnake.transformers.converter import Category2Number
from pipesnake.transformers.misc import ColumnRenamer
from pipesnake.transformers.misc import ToNumpy
from pipesnake.transformers.scaler import MadScaler
from pipesnake.transformers.scaler import UnitLenghtScaler

In [7]:
my_pipe = SeriesPipe(transformers=[
    ColumnRenamer(),  # nomalize columns names
    Category2Number(x_cols='all', y_cols='all'),  # convert y to number instead of strings
    MadScaler(x_cols='all'),  # scale by feature (cols)
    UnitLenghtScaler(x_cols='all'),  # scale by feature vector (rows) 
])

In [8]:
from pipesnake.utils.persistance import dump_pipe

x_new, y_new = my_pipe.fit_transform(x, y)
dump_pipe(my_pipe, 'my_pipe.p')

DEBUG:root:[series_pipe_4020] : fitting...
DEBUG:root:Function: timed before Memory: 91.27 MB
INFO:root:[series_pipe_4020] : fitting x...
INFO:root:[series_pipe_4020] : -> column_renamer_c87d
DEBUG:root:[column_renamer_c87d] : x new column names: ['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5']
INFO:root:[series_pipe_4020] : -> category2_number_bcc1
DEBUG:root:[category2_number_bcc1] : x_cols: ['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5']
DEBUG:root:[category2_number_bcc1] : x category to number: {'x_0': {'high': 0, 'low': 1, 'med': 2, 'vhigh': 3}, 'x_1': {'high': 0, 'low': 1, 'med': 2, 'vhigh': 3}, 'x_2': {'2': 0, '3': 1, '4': 2, '5more': 3}, 'x_3': {'2': 0, '4': 1, 'more': 2}, 'x_4': {'big': 0, 'med': 1, 'small': 2}, 'x_5': {'high': 0, 'low': 1, 'med': 2}}
INFO:root:[series_pipe_4020] : -> mad_scaler_4f95
DEBUG:root:[mad_scaler_4f95] : x_cols: ['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5']
INFO:root:[series_pipe_4020] : -> unit_lenght_scaler_7568
DEBUG:root:[unit_lenght_scaler_7568] : x_cols: ['x

In [9]:
x_new.head()

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5
0,0.447214,0.447214,-0.447214,-0.447214,0.447214,0.0
1,0.408248,0.408248,-0.408248,-0.408248,0.408248,0.408248
2,0.408248,0.408248,-0.408248,-0.408248,0.408248,-0.408248
3,0.5,0.5,-0.5,-0.5,0.0,0.0
4,0.447214,0.447214,-0.447214,-0.447214,0.0,0.447214


In [10]:
y_new.head()

Unnamed: 0,y_0
0,2
1,2
2,2
3,2
4,2


In [11]:
from pipesnake.utils.persistance import load_pipe

my_pipe = load_pipe('my_pipe.p')
x_new, y_new = my_pipe.transform(x, y)

INFO:root:loading: my_pipe.p
DEBUG:root:[series_pipe_4020] : transforming...
DEBUG:root:Function: timed before Memory: 92.55 MB
INFO:root:[series_pipe_4020] : transforming x...
INFO:root:[series_pipe_4020] : -> column_renamer_c87d
DEBUG:root:[column_renamer_c87d] : x new column names: ['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5']
INFO:root:[series_pipe_4020] : -> category2_number_bcc1
DEBUG:root:[category2_number_bcc1] : x category to number: {'x_0': {'high': 0, 'low': 1, 'med': 2, 'vhigh': 3}, 'x_1': {'high': 0, 'low': 1, 'med': 2, 'vhigh': 3}, 'x_2': {'2': 0, '3': 1, '4': 2, '5more': 3}, 'x_3': {'2': 0, '4': 1, 'more': 2}, 'x_4': {'big': 0, 'med': 1, 'small': 2}, 'x_5': {'high': 0, 'low': 1, 'med': 2}}
INFO:root:[series_pipe_4020] : -> mad_scaler_4f95
INFO:root:[series_pipe_4020] : -> unit_lenght_scaler_7568
DEBUG:root:Function: transform_x: 0.05 sec
DEBUG:root:Function: timed after Memory: 92.77 MB
DEBUG:root:Function: timed before Memory: 92.77 MB
INFO:root:[series_pipe_4020] : transf

In [12]:
x_new.head()

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5
0,0.447214,0.447214,-0.447214,-0.447214,0.447214,0.0
1,0.408248,0.408248,-0.408248,-0.408248,0.408248,0.408248
2,0.408248,0.408248,-0.408248,-0.408248,0.408248,-0.408248
3,0.5,0.5,-0.5,-0.5,0.0,0.0
4,0.447214,0.447214,-0.447214,-0.447214,0.0,0.447214


In [13]:
y_new.head()

Unnamed: 0,y_0
0,2
1,2
2,2
3,2
4,2
