# Embedded Methods
Embedded techniques involves thes use of machine learning algorithms that allow feature ranking based on thier significance.
Two of such algoithms are:
* Lasso Regression
* Decision Tree Regression

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/rahul96rajan/sample_datasets/master/boston_housing.csv')
X = data.drop('MEDV', axis=1)
y = data['MEDV'].astype('int')

### Using Lasso Regression to extract significant features

In [3]:
from sklearn.linear_model import Lasso
las = Lasso(alpha=0.8)  # We are keeping high alpha to penalize non-significant feature
                        # such that thier coefficients become zero.
las.fit(X,y)

Lasso(alpha=0.8, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

<b><span style="color:green">To see the calculated coefficeints of each parameter we can use '*las.coef_*'. And since we have kept a rather high alpha non-signinficant features will have coefficients close to zero.</span></b>

In [4]:
print(las.coef_)

[-0.06917452  0.04950487 -0.          0.         -0.          1.59392305
  0.01381677 -0.78457001  0.26766873 -0.01528908 -0.74120882  0.0088153
 -0.7218642 ]


In [5]:
coeff_df = pd.DataFrame({"features": X.columns,
                        "coefficient": abs(las.coef_) })
display(coeff_df.sort_values(by='coefficient', ascending=False))

Unnamed: 0,features,coefficient
5,RM,1.593923
7,DIS,0.78457
10,PTRATIO,0.741209
12,LSTAT,0.721864
8,RAD,0.267669
0,CRIM,0.069175
1,ZN,0.049505
9,TAX,0.015289
6,AGE,0.013817
11,B,0.008815


In [6]:
top_5_las = coeff_df.sort_values(by='coefficient', ascending=False)['features'].values[:5]
print("Lasso selected features: ", top_5_las)

Lasso selected features:  ['RM' 'DIS' 'PTRATIO' 'LSTAT' 'RAD']


### Using Decision Tree to extract significant features

In [7]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=0)
dt.fit(X,y)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

<b><span style="color:green">In dt more significant features are closer to the roots. And using attribute '*feature_importances_*' we can know which feartures are kept closer to the roots.</span></b>

In [8]:
dt.feature_importances_

array([0.12289785, 0.0188059 , 0.02459912, 0.00533241, 0.03967691,
       0.1439793 , 0.13173668, 0.10048103, 0.03714211, 0.03646678,
       0.05763905, 0.10386965, 0.17737321])

In [9]:
impt_df = pd.DataFrame({"features": X.columns,
                        "importance": dt.feature_importances_ })
display(impt_df.sort_values(by='importance', ascending=False))

Unnamed: 0,features,importance
12,LSTAT,0.177373
5,RM,0.143979
6,AGE,0.131737
0,CRIM,0.122898
11,B,0.10387
7,DIS,0.100481
10,PTRATIO,0.057639
4,NOX,0.039677
8,RAD,0.037142
9,TAX,0.036467


In [10]:
top_5_las = impt_df.sort_values(by='importance', ascending=False)['features'].values[:5]
print("Lasso selected features: ", top_5_las)

Lasso selected features:  ['LSTAT' 'RM' 'AGE' 'CRIM' 'B']
