In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.linear_model import LogisticRegression

In [4]:
X, y = load_breast_cancer(return_X_y=True, as_frame=True)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1234)

In [5]:
scaler = StandardScaler()
pca = PCA()
svd = TruncatedSVD(n_components=2)
logreg = LogisticRegression()

In [7]:
X_train_tr = scaler.fit_transform(X_train)
X_test_tr = scaler.transform(X_test)
X_train_tr = pca.fit_transform(X_train_tr)
X_test_tr = pca.transform(X_test_tr)
X_train_tr = svd.fit_transform(X_train_tr)
X_test_tr = svd.transform(X_test_tr)

In [9]:
logreg.fit(X_train_tr, y_train)
print('Test score:', logreg.score(X_test_tr, y_test))

Test score: 0.9090909090909091


## Transformations using a pipeline (the right way)

In [10]:
from sklearn.pipeline import Pipeline, FeatureUnion

In [11]:
union = FeatureUnion([('pca', PCA(n_components=1)),
('svd', TruncatedSVD(n_components=2))])

In [14]:
pipe = Pipeline(
    [('scaler', StandardScaler()),
     ('reduce_dim', union),
     ('classifier', LogisticRegression())]
)

In [15]:
pipe.fit(X_train, y_train)
print("Test score:", pipe.score(X_test, y_test))

Test score: 0.9090909090909091


In [16]:
print("Train score:", pipe.score(X_train, y_train))

Train score: 0.9553990610328639
