In [1]:
import pandas as pd

In [3]:
data = pd.DataFrame({"A":[1,2,4,7,9,13],
                    "B":[4,5,8,12,15,17],
                    "C":[0,0,0,0,0,0],
                    "D":[1,1,1,1,1,1]})

In [4]:
data.head()

Unnamed: 0,A,B,C,D
0,1,4,0,1
1,2,5,0,1
2,4,8,0,1
3,7,12,0,1
4,9,15,0,1


## Variance Threshold
Feature selector that removes all low-variance features.

This feature selection algorithm looks only at the features (X), not the desired outputs (y), and can thus be used for unsupervised learning.

In [5]:
### It will zero variance features
from sklearn.feature_selection import VarianceThreshold
var_thres=VarianceThreshold(threshold=0)
var_thres.fit(data)

In [6]:
var_thres.get_support()

array([ True,  True, False, False])

In [8]:
data.columns[var_thres.get_support()]

Index(['A', 'B'], dtype='object')

In [12]:
constant_columns = [column for column in data.columns
                    if column not in data.columns[var_thres.get_support()]]

print(len(constant_columns))

2


In [13]:
for feature in constant_columns:
     print(feature)

C
D


In [15]:
data.drop(constant_columns,axis=1)

Unnamed: 0,A,B
0,1,4
1,2,5
2,4,8
3,7,12
4,9,15
5,13,17


In [16]:
df = pd.read_csv('mobile_price_range_data.csv')
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [17]:
df.shape

(2000, 21)

In [18]:
X=df.drop(labels=['price_range'], axis=1)
y=df['price_range']

In [20]:
from sklearn.model_selection import train_test_split
# separate dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(
    df.drop(labels=['price_range'], axis=1),
    df['price_range'],
    test_size=0.3,
    random_state=0)

X_train.shape, X_test.shape

((1400, 20), (600, 20))

In [21]:
var_thres=VarianceThreshold(threshold=0)
var_thres.fit(X_train)

In [22]:
var_thres.get_support()

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

In [23]:
### Finding non constant features
sum(var_thres.get_support())

20

In [24]:
# Lets Find non-constant features 
len(X_train.columns[var_thres.get_support()])

20

In [25]:
constant_columns = [column for column in X_train.columns
                    if column not in X_train.columns[var_thres.get_support()]]

print(len(constant_columns))

0


In [27]:
for column in constant_columns:
    print(column)