In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE, SelectKBest, chi2
from sklearn.preprocessing import StandardScaler
import os

os.makedirs('figures', exist_ok=True)
os.makedirs('results', exist_ok=True)
df = pd.read_csv('results/heart_clean.csv')
X = df.drop('target', axis=1)
y = df['target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
rf = RandomForestClassifier(random_state=42)
rf.fit(X_scaled, y)
importances = rf.feature_importances_
plt.bar(X.columns, importances)
plt.xticks(rotation=90)
plt.title('Feature Importance (RandomForest)')
plt.tight_layout()
plt.savefig('figures/rf_feature_importance.png', dpi=160)
plt.close()
rfe = RFE(estimator=LogisticRegression(max_iter=1000), n_features_to_select=5)
rfe.fit(X_scaled, y)
with open('results/rfe_features.txt', 'w') as f:
    f.write('
'.join(X.columns[rfe.support_]))
chi_selector = SelectKBest(score_func=chi2, k=5)
chi_selector.fit(abs(X_scaled), y)
with open('results/chi2_features.txt', 'w') as f:
    f.write('
'.join(X.columns[chi_selector.get_support()]))
print('✅ Feature selection complete.')