In [2]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from custom_transformers import DataFrameSelector, MyLabelBinarizer
from sklearn.decomposition import PCA

In [3]:
df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 3, 5], "C": [3,4,5], "D": [4,5,6]})

In [9]:
pca = PCA(n_components=2)
a = pca.fit(df)
a.components_

array([[-0.37796447, -0.75592895, -0.37796447, -0.37796447],
       [-0.37796447, -0.20734518,  0.89632741, -0.10367259]])

In [85]:
num_pipeline = Pipeline([
    ("selector", DataFrameSelector(["A", "B"])),
    ("scaler", MinMaxScaler()),
    ("pca", PCA())
])
num_pipeline.set_params(pca__n_components=0.95)

Pipeline(memory=None,
         steps=[('selector', DataFrameSelector(attribute_names=['A', 'B'])),
                ('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('pca',
                 PCA(copy=True, iterated_power='auto', n_components=0.95,
                     random_state=None, svd_solver='auto', tol=0.0,
                     whiten=False))],
         verbose=False)

In [86]:
processed = num_pipeline.fit_transform(df)

X:     A  B  C  D
0  1  1  3  4
1  2  3  4  5
2  3  5  5  6  attr:  ['A', 'B']


In [87]:
print(processed)

[[-0.70710678]
 [ 0.        ]
 [ 0.70710678]]
