In [None]:
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import RFE

# Load the Diabetes dataset
diabetes = load_diabetes(as_frame=True)
X = diabetes.data
y = (diabetes.target > diabetes.target.median()).astype(int)  # Binarize the target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Initialize the Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)

# Initialize RFE for backward feature elimination
rfe = RFE(estimator=dt_classifier, n_features_to_select=1)  # 1 feature left

# Fit RFE
rfe.fit(X_train, y_train)

# Get selected features
selected_features = X.columns[rfe.support_]
print(f"Selected features after backward elimination: {selected_features}")

# Evaluate model performance with the final set of features
final_model = DecisionTreeClassifier(random_state=42)
final_model.fit(X_train[selected_features], y_train)
accuracy = final_model.score(X_test[selected_features], y_test)
print(f"Model accuracy with selected features: {accuracy:.4f}")


Selected features after backward elimination: Index(['bmi'], dtype='object')
Model accuracy with selected features: 0.7191


Melbourne Datasets


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import RFE
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load the dataset
data = pd.read_csv('melbourne.csv')

# Check for missing values in the target column
if data['Price'].isnull().any():
    data = data.dropna(subset=['Price'])

# Separate target and features
X = data.drop('Price', axis=1)
y = data['Price']


# List of categorical columns (adjust based on your dataset)
categorical_cols = X_train.select_dtypes(include=['object']).columns

# Define a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='mean'), X_train.select_dtypes(exclude=['object']).columns),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of categorical columns
categorical_cols = X_train.select_dtypes(include=['object']).columns

# Define a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='mean'), X_train.select_dtypes(exclude=['object']).columns),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

# Fit the preprocessor to training data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)


In [None]:
# Initialize the Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=42)

# Initialize RFE for backward feature elimination
rfe = RFE(estimator=rf_regressor, n_features_to_select=1)  # 1 feature left

# Fit RFE
rfe.fit(X_train, y_train)

# Get selected features
selected_features = rfe.support_
selected_feature_names = X_train.columns[selected_features]
print(f"Selected features after backward elimination: {selected_feature_names}")

# Evaluate model performance with the final set of features
final_model = RandomForestRegressor(random_state=42)
final_model.fit(X_train[:, selected_features], y_train)
accuracy = final_model.score(X_test[:, selected_features], y_test)
print(f"Model R² with selected features: {accuracy:.4f}")
