<a href="https://colab.research.google.com/github/ronupanchal/ML_Algorithm_code/blob/main/FS_FilterMethod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [55]:
import pandas as pd
import numpy as np

In [20]:
data = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
features = ['preg','plas','pres','skin','test','mass','pedi','age','class']
df = pd.read_csv(data, names=features)

In [21]:
df.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [22]:
df.shape

(768, 9)

# Preparing the Data

In [23]:
data = df.values
x = data[:,0:8]
y = data[:,8]


In [24]:
x

array([[  6.  , 148.  ,  72.  , ...,  33.6 ,   0.63,  50.  ],
       [  1.  ,  85.  ,  66.  , ...,  26.6 ,   0.35,  31.  ],
       [  8.  , 183.  ,  64.  , ...,  23.3 ,   0.67,  32.  ],
       ...,
       [  5.  , 121.  ,  72.  , ...,  26.2 ,   0.24,  30.  ],
       [  1.  , 126.  ,  60.  , ...,  30.1 ,   0.35,  47.  ],
       [  1.  ,  93.  ,  70.  , ...,  30.4 ,   0.32,  23.  ]])

In [25]:
y

array([1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1.,
       1., 0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0.,
       0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0.,
       0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1.,
       1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1.,
       1., 0., 0., 0., 0.

**Filtering Method**

In [26]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

In [27]:
# feature extraction
chi_best=SelectKBest(score_func=chi2,k=4)
k_best=chi_best.fit(x,y)
#summerize scores
np.set_printoptions(precision=2)
print(k_best.scores_)

k_features=k_best.transform(x)
#summerize selected features
print(k_features[0:5,:])

[ 111.52 1411.89   17.61   53.11 2175.57  127.67    5.39  181.3 ]
[[148.    0.   33.6  50. ]
 [ 85.    0.   26.6  31. ]
 [183.    0.   23.3  32. ]
 [ 89.   94.   28.1  21. ]
 [137.  168.   43.1  33. ]]


#Wrapper Method

####**Recursive Features Elimination RFE**


In [28]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

In [29]:
import warnings
warnings.filterwarnings('ignore')

In [37]:
#Feature extraction
model_lr = LogisticRegression()
recur_fe = RFE(model_lr, n_features_to_select=3, step=1)
Feature = recur_fe.fit(x,y)
print("Number of Features: %s" % (Feature.n_features_))
print("Selected Fetures are: %s" % (Feature.support_))
print("Feature Ranking is as follows: %s" % (Feature.ranking_))


Number of Features: 3
Selected Fetures are: [ True False False False False  True  True False]
Feature Ranking is as follows: [1 2 4 5 6 1 1 3]


# Embedded Method
####**Ridge Regression / L2 Regularization**

In [39]:
from sklearn.linear_model import Ridge

In [40]:
ridge_reg = Ridge(alpha=1.0)
ridge_reg.fit(x,y)

Ridge()

In [58]:
# A helper function for priting the coeffient
def print_coefs(coef, names=None, sort=False):
  if names == None:
    names=["x%s" % x for x in range(len(coef))]
  lst=zip(coef,names)
  if sort:
    lst=sorted(lst,key=lambda x:-np.abs(x[0]))
  return " + ".join("%s * %s" % (np.round(coef, 3), names) for coefs, name in lst)

In [59]:
print("Ridge Model:",print_coefs(ridge_reg.coef_))

Ridge Model: [ 0.02  0.01 -0.    0.   -0.    0.01  0.14  0.  ] * ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'] + [ 0.02  0.01 -0.    0.   -0.    0.01  0.14  0.  ] * ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'] + [ 0.02  0.01 -0.    0.   -0.    0.01  0.14  0.  ] * ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'] + [ 0.02  0.01 -0.    0.   -0.    0.01  0.14  0.  ] * ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'] + [ 0.02  0.01 -0.    0.   -0.    0.01  0.14  0.  ] * ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'] + [ 0.02  0.01 -0.    0.   -0.    0.01  0.14  0.  ] * ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'] + [ 0.02  0.01 -0.    0.   -0.    0.01  0.14  0.  ] * ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'] + [ 0.02  0.01 -0.    0.   -0.    0.01  0.14  0.  ] * ['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']
