#Iris Dataset - Categorical Target - KBest and Decision Trees

##KBest with Chi Square

In [None]:
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Apply k-best feature selection
k = 2  # Number of features to select
k_best = SelectKBest(score_func=chi2, k=k)
X_new = k_best.fit_transform(X, y)

# Get the indices of the selected features
selected_indices = k_best.get_support(indices=True)

# Print the selected feature names
selected_features = [iris.feature_names[i] for i in selected_indices]
print("Selected features:", selected_features)


Selected features: ['petal length (cm)', 'petal width (cm)']


##Decision Trees Classifier

In [None]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Create a decision tree classifier
tree = DecisionTreeClassifier(random_state=42)
tree.fit(X, y)

# Get feature importances
importance = tree.feature_importances_

# Print feature importances
for i, feature_name in enumerate(iris.feature_names):
    print(f"{feature_name}: {importance[i]}")


sepal length (cm): 0.013333333333333329
sepal width (cm): 0.0
petal length (cm): 0.5640559581320451
petal width (cm): 0.4226107085346215


#Diabetes Dataset - Numeric Target - KBest, LASSO, and Decision Trees

##KBest with F-1/ANOVA

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.feature_selection import SelectKBest, f_regression

# Load the diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Apply k-best feature selection
k = 5  # Set the number of top features to select
selector = SelectKBest(score_func=f_regression, k=k)
X_new = selector.fit_transform(X, y)

# Get the indices of the selected features
selected_indices = selector.get_support(indices=True)

# Get the names of the selected features
all_feature_names = diabetes.feature_names
selected_feature_names = [all_feature_names[i] for i in selected_indices]

# Print the selected feature names
print("Selected Features:", selected_feature_names)


Selected Features: ['bmi', 'bp', 's3', 's4', 's5']


##LASSO

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Lasso

# Load the diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Apply LASSO regression
lasso = Lasso(alpha=0.1)  # Set the regularization strength (alpha) according to your needs
lasso.fit(X, y)

# Get the non-zero coefficients and their corresponding feature names
non_zero_coefficients = lasso.coef_
feature_names = diabetes.feature_names

# Retrieve the selected features
selected_features = [feature_names[i] for i, coef in enumerate(non_zero_coefficients) if coef != 0]

# Print the selected features
print("Selected Features:", selected_features)


Selected Features: ['sex', 'bmi', 'bp', 's1', 's3', 's5', 's6']


##Decison Trees Regressor

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.tree import DecisionTreeRegressor

# Load the diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Create a decision tree regressor
tree = DecisionTreeRegressor(random_state=42)
tree.fit(X, y)

# Get feature importances
importance = tree.feature_importances_

# Print feature importances
for i, feature_name in enumerate(diabetes.feature_names):
    print(f"{feature_name}: {importance[i]}")


age: 0.046833846582986075
sex: 0.00958814970729943
bmi: 0.23415532633695108
bp: 0.0781295134868094
s1: 0.0835967483619816
s2: 0.054094433040148604
s3: 0.06450281744892168
s4: 0.01627137633064481
s5: 0.34413592725693953
s6: 0.06869186144731768
