In [None]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pandas as pd
import warnings

# Ignore convergence warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Assume df is your dataframe with relevant columns

# List of chronic conditions
chronic_conditions = ['Heart Attack', 'Angina Or Coronary Heart Disease', 'Stroke']

# Create a copy of the original dataframe
df_copy = df.copy()

# List to store selected features for each condition
selected_features_list = []

for condition in chronic_conditions:
    # Encode target variable
    label_encoder = LabelEncoder()
    df_copy[condition] = label_encoder.fit_transform(df_copy[condition])

    # Encode categorical columns
    categorical_columns = df_copy.select_dtypes(include=['object']).columns
    for col in categorical_columns:
        df_copy[col] = label_encoder.fit_transform(df_copy[col])

    # Split the data
    X = df_copy.drop([condition], axis=1)  # Features
    y = df_copy[condition]  # Target variable

    # Feature scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Apply SelectKBest to select the top k features based on ANOVA F-statisticsANOVA F-statistics
    k_best_features = 15
    selector = SelectKBest(f_classif, k=k_best_features)
    X_selected = selector.fit_transform(X_scaled, y)

    # Store the selected features for this condition
    selected_features_list.append(set(X.columns[selector.get_support()]))

# Take the intersection of selected features across all conditions
common_features = set.intersection(*selected_features_list)

print(f"Common top {k_best_features} features for all conditions: {common_features}")
