In [2]:
import sys
sys.path.append('../framedct')
from framed_column_transformer import FramedColumnTransfomer
from sklearn.pipeline import Pipeline
from sklearn import preprocessing, impute
import pandas as pd
import numpy as np

%load_ext autoreload
%autoreload 1

In [3]:
data = pd.DataFrame({
  'Age': [5, 23, 16, 30, 45],
  'Height': [103,  185, 170,  np.nan, 175],
  'Gender': ['Female', 'Female', 'Female', 'Male', np.nan],
  'Country': ['Germany', 'England', 'Canada', 'Canada', 'France']
})

data

Unnamed: 0,Age,Height,Gender,Country
0,5,103.0,Female,Germany
1,23,185.0,Female,England
2,16,170.0,Female,Canada
3,30,,Male,Canada
4,45,175.0,,France


In [4]:
num_cols = ['Age', 'Height']
cat_cols = ['Gender', 'Country']

In [5]:
numerical_pipeline = Pipeline(steps=[
  ('imputer', impute.SimpleImputer(strategy='median')),
  ('scaler', preprocessing.StandardScaler())
])

categorical_pipeline = Pipeline(steps=[
  ('imputer', impute.SimpleImputer(strategy='most_frequent')),
  ('one_hot_encoder', preprocessing.OneHotEncoder(handle_unknown='error', drop='if_binary'))
])

In [6]:
ct = FramedColumnTransfomer(transformers=[
  ('numerical_pipeline', numerical_pipeline, num_cols),
  ('categorical_pipeline', categorical_pipeline, cat_cols)
])

ct.fit_transform(data)

Unnamed: 0,Age,Height,Gender_Male,Country_Canada,Country_England,Country_France,Country_Germany
0,-1.399095,-1.969956,0.0,0.0,0.0,0.0,1.0
1,-0.059536,0.81036,0.0,0.0,1.0,0.0,0.0
2,-0.580475,0.301766,0.0,1.0,0.0,0.0,0.0
3,0.461404,0.386532,1.0,1.0,0.0,0.0,0.0
4,1.577702,0.471297,0.0,0.0,0.0,1.0,0.0


In [7]:
ct2 = FramedColumnTransfomer(transformers=[
    ('numerical_pipeline', numerical_pipeline, num_cols),
    ('categorical_pipeline', categorical_pipeline, ['Gender']),
    ('remaining_features', 'passthrough', ['Country'])
])

ct2.fit_transform(data)


Unnamed: 0,Age,Height,Gender_Male,Country
0,-1.399095,-1.969956,0.0,Germany
1,-0.059536,0.81036,0.0,England
2,-0.580475,0.301766,0.0,Canada
3,0.461404,0.386532,1.0,Canada
4,1.577702,0.471297,0.0,France


In [8]:
ct3 = FramedColumnTransfomer(transformers=[
    ('numerical_pipeline', numerical_pipeline, num_cols),
    ('categorical_pipeline', categorical_pipeline, ['Gender']),
    ('remaining_features', 'drop', ['Country'])
])

ct3.fit_transform(data)


Unnamed: 0,Age,Height,Gender_Male
0,-1.399095,-1.969956,0.0
1,-0.059536,0.81036,0.0
2,-0.580475,0.301766,0.0
3,0.461404,0.386532,1.0
4,1.577702,0.471297,0.0


In [9]:
ct4 = FramedColumnTransfomer(transformers=[
    ('numerical_pipeline', numerical_pipeline, num_cols),
    ('categorical_pipeline', categorical_pipeline, ['Gender'])
], remainder='drop')

ct4.fit_transform(data)


Unnamed: 0,Age,Height,Gender_Male
0,-1.399095,-1.969956,0.0
1,-0.059536,0.81036,0.0
2,-0.580475,0.301766,0.0
3,0.461404,0.386532,1.0
4,1.577702,0.471297,0.0


In [10]:
ct5 = FramedColumnTransfomer(transformers=[
    ('numerical_pipeline', numerical_pipeline, num_cols),
    ('categorical_pipeline', categorical_pipeline, ['Country'])
], remainder='passthrough')

ct5.fit_transform(data)


Unnamed: 0,Age,Height,Country_Canada,Country_England,Country_France,Country_Germany,Gender
0,-1.399095,-1.969956,0.0,0.0,0.0,1.0,Female
1,-0.059536,0.81036,0.0,1.0,0.0,0.0,Female
2,-0.580475,0.301766,1.0,0.0,0.0,0.0,Female
3,0.461404,0.386532,1.0,0.0,0.0,0.0,Male
4,1.577702,0.471297,0.0,0.0,1.0,0.0,


In [11]:
ct6 = FramedColumnTransfomer(transformers=[
    ('numerical_pipeline', numerical_pipeline, [0, 1]),
    ('categorical_pipeline', categorical_pipeline, [3])
], remainder='drop')

ct6.fit_transform(data)


Unnamed: 0,x0,x1,x0_Canada,x0_England,x0_France,x0_Germany
0,-1.399095,-1.969956,0.0,0.0,0.0,1.0
1,-0.059536,0.81036,0.0,1.0,0.0,0.0
2,-0.580475,0.301766,1.0,0.0,0.0,0.0
3,0.461404,0.386532,1.0,0.0,0.0,0.0
4,1.577702,0.471297,0.0,0.0,1.0,0.0


In [12]:
ct7 = FramedColumnTransfomer(transformers=[
    ('numerical_pipeline', numerical_pipeline, [0, 1]),
    ('categorical_pipeline', categorical_pipeline, [3])
], remainder='passthrough')

ct7.fit_transform(data)

Unnamed: 0,x0,x1,x0_Canada,x0_England,x0_France,x0_Germany,Gender
0,-1.399095,-1.969956,0.0,0.0,0.0,1.0,Female
1,-0.059536,0.81036,0.0,1.0,0.0,0.0,Female
2,-0.580475,0.301766,1.0,0.0,0.0,0.0,Female
3,0.461404,0.386532,1.0,0.0,0.0,0.0,Male
4,1.577702,0.471297,0.0,0.0,1.0,0.0,
