##  Improved code with suggestions included as dated 10 june 2025

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, confusion_matrix
from mrmr import mrmr_classif


In [2]:

# Load data
df = pd.read_excel(r"C:\Users\kumar\OneDrive\Desktop\Programs\Research@AIIMS\KKD AFT_SCA AI-ML_Calculate.xlsx")

# Encode target
df["SCA_type"] = LabelEncoder().fit_transform(df["Groups"])

# Add 1 to these 4 features
score_features = ['Sudomotor_Score', 'Cardiovagal_Score', 'Adrenergic_Score', 'CASS']
for col in score_features:
    df[col] += 1

In [3]:
aft_features = [
    'RRI_SDNN', 'RRI_RMSSD', 'RRI_LF_nu', 'RRI_HF_nu', 'RRI_Power_nu',
    'RRI_SD1_nu', 'RRI_SD2_nu',
    'SBP_SDNN', 'SBP_RMSSD', 'SBP_LF_nu', 'SBP_HF_nu', 'SBP_Power_nu',
    'SBP_All_BRS', 'SBP_αLF', 'SBP_αHF',
    'Sudomotor_Score', 'Cardiovagal_Score', 'Adrenergic_Score', 'CASS'
]

# Separate scalable features from excluded ones
excluded = score_features
scalable = [col for col in aft_features if col not in excluded]


In [4]:
# Data prep
X = df[aft_features]
y = df["SCA_type"]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# ColumnTransformer: scale only the selected features
scaler = ColumnTransformer(
    transformers=[("scale", StandardScaler(), scalable)],
    remainder="passthrough"
)

pipeline_svm_no_mrmr = Pipeline([
    ('scaler', scaler),
    ('svm', SVC(random_state=42))
])

param_grid = {
    'svm__C': [0.01, 0.1, 1, 10],
    'svm__gamma': ['scale', 0.01, 0.1],
    'svm__kernel': ['rbf'],
    'svm__class_weight': [None, 'balanced']
}


In [5]:

grid_svm_no_mrmr = GridSearchCV(pipeline_svm_no_mrmr, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_svm_no_mrmr.fit(X_train, y_train)

# Predict and evaluate
y_pred_no_mrmr = grid_svm_no_mrmr.predict(X_test)
print(">>> SVM WITHOUT mRMR <<<")
print("Best Parameters:", grid_svm_no_mrmr.best_params_)
print("Test Accuracy:", np.mean(y_pred_no_mrmr == y_test))
print(classification_report(y_test, y_pred_no_mrmr))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_no_mrmr))


>>> SVM WITHOUT mRMR <<<
Best Parameters: {'svm__C': 1, 'svm__class_weight': None, 'svm__gamma': 'scale', 'svm__kernel': 'rbf'}
Test Accuracy: 0.6
              precision    recall  f1-score   support

           0       1.00      0.14      0.25         7
           1       0.57      1.00      0.73         8

    accuracy                           0.60        15
   macro avg       0.79      0.57      0.49        15
weighted avg       0.77      0.60      0.50        15

Confusion Matrix:
 [[1 6]
 [0 8]]


In [8]:
aft_features     

['RRI_SDNN',
 'RRI_RMSSD',
 'RRI_LF_nu',
 'RRI_HF_nu',
 'RRI_Power_nu',
 'RRI_SD1_nu',
 'RRI_SD2_nu',
 'SBP_SDNN',
 'SBP_RMSSD',
 'SBP_LF_nu',
 'SBP_HF_nu',
 'SBP_Power_nu',
 'SBP_All_BRS',
 'SBP_αLF',
 'SBP_αHF',
 'Sudomotor_Score',
 'Cardiovagal_Score',
 'Adrenergic_Score',
 'CASS']

In [10]:
# Select features using mRMR
df_mrmr = df[aft_features + ['SCA_type']]
df_mrmr.tail() 

Unnamed: 0,RRI_SDNN,RRI_RMSSD,RRI_LF_nu,RRI_HF_nu,RRI_Power_nu,RRI_SD1_nu,RRI_SD2_nu,SBP_SDNN,SBP_RMSSD,SBP_LF_nu,SBP_HF_nu,SBP_Power_nu,SBP_All_BRS,SBP_αLF,SBP_αHF,Sudomotor_Score,Cardiovagal_Score,Adrenergic_Score,CASS,SCA_type
66,8.4221,2.6413,72.867,17.538,651.119,0.3184,2.0192,8.3202,2.1753,90.256,5.461,241.318,1.754,0.1916,0.4072,4,3,2,7,1
67,31.5561,11.4026,69.213,19.076,224.344,1.4633,7.9636,2.5585,1.0648,78.19,12.75,173.415,12.578,2.5112,3.3524,4,2,2,6,1
68,43.2217,30.7663,42.411,51.799,181.681,2.6846,7.057,3.9929,1.1618,89.435,6.002,225.143,23.733,3.2015,13.2552,4,1,2,5,1
69,14.3283,7.8733,62.761,27.507,301.089,0.7865,2.7699,9.815,2.6279,75.677,16.695,474.272,4.304,0.6391,0.7912,3,3,2,6,1
70,12.512,6.5038,69.092,16.785,175.074,0.5941,2.2253,6.2656,1.5142,84.894,9.451,564.508,7.986,0.9346,0.9506,4,2,2,6,1


In [None]:
mrmr_output = mrmr_classif(X=df_mrmr[aft_features], y=df_mrmr['SCA_type'], K=15)
#mrmr_output = mrmr_classif(X=df_mrmr[aft_features], y=df_mrmr['SCA_type'], K=19,return_scores=True)
print("Top 19 mRMR Features:", mrmr_output)


100%|██████████| 19/19 [00:04<00:00,  4.15it/s]

Top 19 mRMR Features: (['SBP_Power_nu', 'RRI_Power_nu', 'SBP_HF_nu', 'Adrenergic_Score', 'SBP_RMSSD', 'Cardiovagal_Score', 'Sudomotor_Score', 'RRI_HF_nu', 'RRI_SD2_nu', 'CASS', 'SBP_SDNN', 'RRI_LF_nu', 'SBP_All_BRS', 'SBP_LF_nu', 'RRI_RMSSD', 'RRI_SDNN', 'RRI_SD1_nu', 'SBP_αHF', 'SBP_αLF'], RRI_SDNN             0.031848
RRI_RMSSD            0.042411
RRI_LF_nu            0.088047
RRI_HF_nu            0.259276
RRI_Power_nu         2.815866
RRI_SD1_nu           0.011201
RRI_SD2_nu           0.108766
SBP_SDNN             0.108635
SBP_RMSSD            1.093422
SBP_LF_nu            0.051388
SBP_HF_nu            0.953847
SBP_Power_nu         4.656386
SBP_All_BRS          0.071665
SBP_αLF              0.001246
SBP_αHF              0.002345
Sudomotor_Score      0.463256
Cardiovagal_Score    1.022386
Adrenergic_Score     1.022386
CASS                 0.180164
dtype: float64,                    RRI_SDNN  RRI_RMSSD  RRI_LF_nu  RRI_HF_nu  RRI_Power_nu  \
RRI_SDNN           0.001000   0.859528   0.0




In [12]:

if isinstance(mrmr_output, tuple) and len(mrmr_output) >= 2:
        selected_features = mrmr_output[0]
        scores_df = mrmr_output[1]
else:
    print("Something worng")
    
sorted_scores_df = scores_df.sort_values(ascending=False)
print("Sorted mRMR Scores:", sorted_scores_df)

Sorted mRMR Scores: SBP_Power_nu         4.656386
RRI_Power_nu         2.815866
SBP_RMSSD            1.093422
Adrenergic_Score     1.022386
Cardiovagal_Score    1.022386
SBP_HF_nu            0.953847
Sudomotor_Score      0.463256
RRI_HF_nu            0.259276
CASS                 0.180164
RRI_SD2_nu           0.108766
SBP_SDNN             0.108635
RRI_LF_nu            0.088047
SBP_All_BRS          0.071665
SBP_LF_nu            0.051388
RRI_RMSSD            0.042411
RRI_SDNN             0.031848
RRI_SD1_nu           0.011201
SBP_αHF              0.002345
SBP_αLF              0.001246
dtype: float64


In [13]:
excel_output_df = sorted_scores_df.reset_index(name='mRMR_Score')
excel_output_df.rename(columns={'index': 'Feature_Name'}, inplace=True)

# Define the filename for your Excel file
excel_filename = '19mRMR_Scores_Output10june.xlsx'

# Save the DataFrame to an Excel file
# index=False prevents Pandas from writing the DataFrame index as a column in the Excel file.
excel_output_df.to_excel(excel_filename, index=False)

print(f"\nSuccessfully saved sorted mRMR scores to '{excel_filename}'")


Successfully saved sorted mRMR scores to '19mRMR_Scores_Output10june.xlsx'


In [None]:

X_mrmr = df[mrmr_output]
X_train_mrmr, X_test_mrmr, y_train_mrmr, y_test_mrmr = train_test_split(X_mrmr, y, stratify=y, test_size=0.2, random_state=42)

# Identify scalable among selected
scalable_mrmr = [f for f in mrmr_output if f not in excluded]
scaler_mrmr = ColumnTransformer(
    transformers=[("scale", StandardScaler(), scalable_mrmr)],
    remainder="passthrough"
)

pipeline_svm_mrmr = Pipeline([
    ('scaler', scaler_mrmr),
    ('svm', SVC(random_state=42))
])

grid_svm_mrmr = GridSearchCV(pipeline_svm_mrmr, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_svm_mrmr.fit(X_train_mrmr, y_train_mrmr)


In [7]:

# Predict and evaluate
y_pred_mrmr = grid_svm_mrmr.predict(X_test_mrmr)
print("\n>>> SVM WITH mRMR <<<")
print("Best Parameters:", grid_svm_mrmr.best_params_)
print("Test Accuracy:", np.mean(y_pred_mrmr == y_test_mrmr))
print(classification_report(y_test_mrmr, y_pred_mrmr))
print("Confusion Matrix:\n", confusion_matrix(y_test_mrmr, y_pred_mrmr))



>>> SVM WITH mRMR <<<
Best Parameters: {'svm__C': 10, 'svm__class_weight': None, 'svm__gamma': 'scale', 'svm__kernel': 'rbf'}
Test Accuracy: 0.6666666666666666
              precision    recall  f1-score   support

           0       0.67      0.57      0.62         7
           1       0.67      0.75      0.71         8

    accuracy                           0.67        15
   macro avg       0.67      0.66      0.66        15
weighted avg       0.67      0.67      0.66        15

Confusion Matrix:
 [[4 3]
 [2 6]]
