Skip to content

Commit

Permalink
[MRG] DEP remove legacy mode from OneHotEncoder (#13855)
Browse files Browse the repository at this point in the history
  • Loading branch information
glemaitre authored and jnothman committed May 29, 2019
1 parent a98db9a commit 9ee164b
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 661 deletions.
16 changes: 6 additions & 10 deletions doc/modules/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -504,9 +504,8 @@ Continuing the example above::
>>> enc = preprocessing.OneHotEncoder()
>>> X = [['male', 'from US', 'uses Safari'], ['female', 'from Europe', 'uses Firefox']]
>>> enc.fit(X) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
OneHotEncoder(categorical_features=None, categories=None, drop=None,
dtype=<... 'numpy.float64'>, handle_unknown='error',
n_values=None, sparse=True)
OneHotEncoder(categories='auto', drop=None, dtype=<... 'numpy.float64'>,
handle_unknown='error', sparse=True)
>>> enc.transform([['female', 'from US', 'uses Safari'],
... ['male', 'from Europe', 'uses Safari']]).toarray()
array([[1., 0., 0., 1., 0., 1.],
Expand All @@ -530,10 +529,8 @@ dataset::
>>> # feature
>>> X = [['male', 'from US', 'uses Safari'], ['female', 'from Europe', 'uses Firefox']]
>>> enc.fit(X) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
OneHotEncoder(categorical_features=None,
categories=[...], drop=None,
dtype=<... 'numpy.float64'>, handle_unknown='error',
n_values=None, sparse=True)
OneHotEncoder(categories=[...], drop=None, dtype=<... 'numpy.float64'>,
handle_unknown='error', sparse=True)
>>> enc.transform([['female', 'from Asia', 'uses Chrome']]).toarray()
array([[1., 0., 0., 1., 0., 0., 1., 0., 0., 0.]])

Expand All @@ -548,9 +545,8 @@ columns for this feature will be all zeros
>>> enc = preprocessing.OneHotEncoder(handle_unknown='ignore')
>>> X = [['male', 'from US', 'uses Safari'], ['female', 'from Europe', 'uses Firefox']]
>>> enc.fit(X) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
OneHotEncoder(categorical_features=None, categories=None, drop=None,
dtype=<... 'numpy.float64'>, handle_unknown='ignore',
n_values=None, sparse=True)
OneHotEncoder(categories='auto', drop=None, dtype=<... 'numpy.float64'>,
handle_unknown='ignore', sparse=True)
>>> enc.transform([['female', 'from Asia', 'uses Chrome']]).toarray()
array([[1., 0., 0., 0., 0., 0.]])

Expand Down
4 changes: 2 additions & 2 deletions examples/ensemble/plot_feature_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@

# Supervised transformation based on random forests
rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
rf_enc = OneHotEncoder(categories='auto')
rf_enc = OneHotEncoder()
rf_lm = LogisticRegression(max_iter=1000)
rf.fit(X_train, y_train)
rf_enc.fit(rf.apply(X_train))
Expand All @@ -73,7 +73,7 @@

# Supervised transformation based on gradient boosted trees
grd = GradientBoostingClassifier(n_estimators=n_estimator)
grd_enc = OneHotEncoder(categories='auto')
grd_enc = OneHotEncoder()
grd_lm = LogisticRegression(max_iter=1000)
grd.fit(X_train, y_train)
grd_enc.fit(grd.apply(X_train)[:, :, 0])
Expand Down
2 changes: 1 addition & 1 deletion sklearn/compose/tests/test_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1057,7 +1057,7 @@ def test_column_transformer_negative_column_indexes():
X_categories = np.array([[1], [2]])
X = np.concatenate([X, X_categories], axis=1)

ohe = OneHotEncoder(categories='auto')
ohe = OneHotEncoder()

tf_1 = ColumnTransformer([('ohe', ohe, [-1])], remainder='passthrough')
tf_2 = ColumnTransformer([('ohe', ohe, [2])], remainder='passthrough')
Expand Down
3 changes: 1 addition & 2 deletions sklearn/ensemble/forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2002,8 +2002,7 @@ def fit_transform(self, X, y=None, sample_weight=None):
y = rnd.uniform(size=X.shape[0])
super().fit(X, y, sample_weight=sample_weight)

self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output,
categories='auto')
self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output)
return self.one_hot_encoder_.fit_transform(self.apply(X))

def transform(self, X):
Expand Down
Loading

0 comments on commit 9ee164b

Please sign in to comment.