## Quasi-constant features

Quasi-constant features are those that show the same value for the large majority of the observations.

In [1]:
import random
import pandas as pd

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from feature_engine.selection import DropConstantFeatures

In [2]:
# Toy dataset with redundant and quasi-constant features

X, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_classes=2,
    random_state=10,
)

X = pd.DataFrame(X)
y = pd.Series(y)

# Add quasi-constant features
random.seed(10)
X.iloc[random.sample(range(0, 1000), 990), [5, 7, 9]] = 1

X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-1.005838,-0.376539,-0.62018,-0.157567,-1.120805,1.0,-1.574578,1.0,1.08018,1.0
1,0.41118,0.762409,-0.78421,-0.096479,-0.408758,1.0,0.210942,1.0,-0.461301,1.0
2,-1.525408,2.227934,0.547727,-0.341481,-0.817577,1.0,-2.663678,1.0,1.698919,1.0
3,-1.374563,0.061129,-0.995868,-0.214351,-0.558957,1.0,-2.149167,1.0,-1.383965,1.0
4,-0.549798,0.046349,0.834756,-0.104845,-0.455528,1.0,-0.911018,1.0,1.068259,1.0


In [3]:
# Example of quasi-constant feature

X[5].value_counts()

 1.000000    990
-1.262250      1
 0.110908      1
 1.226226      1
 0.570835      1
 1.645921      1
-0.182120      1
-1.417213      1
-0.272211      1
-0.112173      1
-1.296468      1
Name: 5, dtype: int64

In [4]:
# separate dataset into train and test

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0,
)

X_train.shape, X_test.shape

((700, 10), (300, 10))

In [5]:
# To remove constant features
sel = DropConstantFeatures(tol=0.95)

# fit finds the quasi-constant features
sel.fit(X_train)  

In [6]:
# the quasi-constant features

sel.features_to_drop_

[5, 7, 9]

In [7]:
# drop quasi-constant features

X_train_t = sel.transform(X_train)
X_test_t = sel.transform(X_test)

X_train_t.shape, X_test_t.shape

((700, 7), (300, 7))

In [8]:
# the result is already a dataframe

X_train_t.head()

Unnamed: 0,0,1,2,3,4,6,8
105,-1.155673,0.039801,1.501392,-0.18924,1.546828,-1.831193,0.209412
68,0.404169,-0.078494,-1.536507,-0.496806,0.9651,-0.873804,0.629114
479,0.36085,-0.731712,0.972453,-0.3093,-1.432922,-0.419046,0.377169
399,0.816893,-0.121187,0.516685,-0.800862,-0.73617,-1.219396,-1.027631
434,1.129063,-2.089187,0.899235,-0.241111,1.287536,0.643273,0.085618
