In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score

In [None]:
# Load the dataset
data = pd.read_csv('heart.csv')

In [None]:
# Separate features and target variable
X = data.drop(columns=['target'], axis=1)
y = data['target']


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
# Initialize logistic regression model
log_reg = LogisticRegression(max_iter=1000, solver='liblinear')

In [None]:
# Apply Recursive Feature Elimination (RFE)
rfe = RFE(log_reg, n_features_to_select=1)  # n_features_to_select=1 will rank all features
rfe.fit(X_train, y_train)

In [None]:
# Create a DataFrame to display feature ranking
feature_ranking = pd.DataFrame({
    'Feature': X.columns,
    'Ranking': rfe.ranking_
}).sort_values(by='Ranking')

print("Feature Importance Ranking:")
print(feature_ranking)

Feature Importance Ranking:
     Feature  Ranking
8      exang        1
1        sex        2
11        ca        3
10     slope        4
2         cp        5
9    oldpeak        6
5        fbs        7
12      thal        8
6    restecg        9
7    thalach       10
0        age       11
3   trestbps       12
4       chol       13


In [None]:
# If you want the most important features, you can filter for top N features
top_features = feature_ranking[feature_ranking['Ranking'] == 1]
print("\nMost Important Features:")
print(top_features)


Most Important Features:
  Feature  Ranking
8   exang        1


In [None]:
# Set default values based on central tendency
default_values = {}
for feature in feature_ranking['Feature']:
    # Choose central tendency based on data type (mean for continuous, mode for categorical)
    if X[feature].dtype in ['float64', 'int64']:
        default_values[feature] = X[feature].median()  # Using median as default
    else:
        default_values[feature] = X[feature].mode()[0]  # Using mode for categorical

print("\nSuggested Default Values for Least Important Features:")
print(default_values)



Suggested Default Values for Least Important Features:
{'exang': 0.0, 'sex': 1.0, 'ca': 0.0, 'slope': 1.0, 'cp': 1.0, 'oldpeak': 0.8, 'fbs': 0.0, 'thal': 2.0, 'restecg': 1.0, 'thalach': 153.0, 'age': 55.0, 'trestbps': 130.0, 'chol': 240.0}


In [None]:
# Initialize and fit the logistic regression model
log_reg = LogisticRegression(max_iter=1000, solver='liblinear')
log_reg.fit(X_train, y_train)  # Fit the model on training data

Model Evaluation

Accuracy Score

In [None]:
# Calculate accuracy on training data
X_train_prediction = log_reg.predict(X_train)
training_data_accuracy = accuracy_score(y_train, X_train_prediction)
print('Accuracy on Training data:', training_data_accuracy)

Accuracy on Training data: 0.8632075471698113


In [None]:

# Calculate accuracy on test data
X_test_prediction = log_reg.predict(X_test)
test_data_accuracy = accuracy_score(y_test, X_test_prediction)
print('Accuracy on Test data:', test_data_accuracy)

Accuracy on Test data: 0.8021978021978022
