# Before and after SVM

In [85]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
import mrmr
from mrmr import mrmr_classif

In [86]:
# Load your data
datap=r"C:\Users\kumar\OneDrive\Desktop\Programs\Research@AIIMS\KKD AFT_SCA AI-ML_Calculate.xlsx"
df = pd.read_excel(datap)
df.tail()

Unnamed: 0,Groups,Patient_No.,RRI_SDNN,RRI_RMSSD,RRI_LF_nu,RRI_HF_nu,RRI_Power_nu,RRI_SD1_nu,RRI_SD2_nu,SBP_SDNN,...,SBP_αLF,SBP_αHF,Sudomotor_Score,Cardiovagal_Score,Adrenergic_Score,CASS,Age,Disease_durn,Age of onset,ICARS
66,2,SCA2_36,8.4221,2.6413,72.867,17.538,651.119,0.3184,2.0192,8.3202,...,0.1916,0.4072,3,2,1,6,25,6.0,19.0,43
67,2,SCA2_37,31.5561,11.4026,69.213,19.076,224.344,1.4633,7.9636,2.5585,...,2.5112,3.3524,3,1,1,5,18,6.0,12.0,87
68,2,SCA2_38,43.2217,30.7663,42.411,51.799,181.681,2.6846,7.057,3.9929,...,3.2015,13.2552,3,0,1,4,47,7.0,40.0,26
69,2,SCA2_39,14.3283,7.8733,62.761,27.507,301.089,0.7865,2.7699,9.815,...,0.6391,0.7912,2,2,1,5,52,6.0,46.0,38
70,2,SCA2_40,12.512,6.5038,69.092,16.785,175.074,0.5941,2.2253,6.2656,...,0.9346,0.9506,3,1,1,5,28,3.0,25.0,27


In [82]:
df.shape

(71, 25)

In [None]:
# Label encode target
df["SCA_type"] = LabelEncoder().fit_transform(df["Groups"])  # 0 = SCA1, 1 = SCA2

# Define 24 AFT features based on dictionary
# aft_features = [
#     'RRI_SDNN', 'RRI_RMSSD', 'RRI_LF_nu', 'RRI_HF_nu', 'RRI_Power_nu', 'RRI_SD1_nu', 'RRI_SD2_nu', 'SBP_SDNN', 'SBP_RMSSD', 'SBP_LF_nu', 'SBP_HF_nu', 'SBP_Power_nu', 'SBP_All_BRS', 'SBP_αLF', 'SBP_αHF', 'Sudomotor_Score', 'Cardiovagal_Score', 'Adrenergic_Score', 'CASS', 'Age', 'Disease_durn', 'Age of onset', 'ICARS'
# ]


# Define 19 AFT features based on dictionary
aft_features = [
    'RRI_SDNN', 'RRI_RMSSD', 'RRI_LF_nu', 'RRI_HF_nu', 'RRI_Power_nu', 'RRI_SD1_nu', 'RRI_SD2_nu', 'SBP_SDNN', 'SBP_RMSSD', 'SBP_LF_nu', 'SBP_HF_nu', 'SBP_Power_nu', 'SBP_All_BRS', 'SBP_αLF', 'SBP_αHF', 'Sudomotor_Score', 'Cardiovagal_Score', 'Adrenergic_Score', 'CASS'
]



X = df[aft_features]
y = df["SCA_type"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [72]:
print("X_train:",X_train.shape)
print("X_test:",X_test.shape)
print("Y_train:",y_train.shape)
print("Y_test:",y_test.shape)

X_train: (56, 23)
X_test: (15, 23)
Y_train: (56,)
Y_test: (15,)


In [84]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM model with GridSearchCV
param_grid = {
    'C': [0.001, 0.01,0.1, 1, 10, 50],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf'],
    'class_weight': [None, 'balanced'], 
}

grid_svm = GridSearchCV(SVC(), param_grid, cv=10, scoring='accuracy')
grid_svm.fit(X_train_scaled, y_train)

# Predict
y_pred = grid_svm.predict(X_test_scaled)
print(">>> SVM without mRMR <<<")
print("Best Params:", grid_svm.best_params_)
print("Accuracy:", grid_svm.best_score_)
print("Test Performance:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


>>> SVM without mRMR <<<
Best Params: {'C': 0.001, 'class_weight': None, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 0.5899999999999999
Test Performance:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.47      1.00      0.64         7

    accuracy                           0.47        15
   macro avg       0.23      0.50      0.32        15
weighted avg       0.22      0.47      0.30        15

Confusion Matrix:
 [[0 8]
 [0 7]]


In [74]:

#Encode target
df_mrmr = df[aft_features + ["SCA_type"]].copy()
df_mrmr["SCA_type"] = LabelEncoder().fit_transform(df_mrmr["SCA_type"])
X_all = df_mrmr.drop(columns=["SCA_type"])
y_all = df_mrmr["SCA_type"]


In [76]:
# Select top 10 features using mRMR
#mrmr_output = mrmr_classif(X=X_all, y=y_all, K=10,return_scores=True)
mrmr_output = mrmr_classif(X=X_all, y=y_all, K=15)
print("Top mRMR Features:", mrmr_output)

100%|██████████| 15/15 [00:00<00:00, 45.16it/s]

Top mRMR Features: ['SBP_Power_nu', 'Age', 'Cardiovagal_Score', 'RRI_Power_nu', 'Disease_durn', 'SBP_HF_nu', 'Age of onset', 'Adrenergic_Score', 'SBP_RMSSD', 'ICARS', 'Sudomotor_Score', 'RRI_HF_nu', 'RRI_SD2_nu', 'CASS', 'SBP_SDNN']





In [None]:

if isinstance(mrmr_output, tuple) and len(mrmr_output) >= 2:
        selected_features = mrmr_output[0]
        scores_df = mrmr_output[1]
else:
    print("Something worng")
    
sorted_scores_df = scores_df.sort_values(ascending=False)
print("Sorted mRMR Scores:", sorted_scores_df)

Sorted mRMR Scores: SBP_Power_nu         4.656386
Disease_durn         3.048252
RRI_Power_nu         2.815866
Age of onset         2.613369
ICARS                1.588202
SBP_RMSSD            1.093422
Cardiovagal_Score    1.022386
Adrenergic_Score     1.022386
SBP_HF_nu            0.953847
Age                  0.822949
Sudomotor_Score      0.463256
RRI_HF_nu            0.259276
CASS                 0.180164
RRI_SD2_nu           0.108766
SBP_SDNN             0.108635
RRI_LF_nu            0.088047
SBP_All_BRS          0.071665
SBP_LF_nu            0.051388
RRI_RMSSD            0.042411
RRI_SDNN             0.031848
RRI_SD1_nu           0.011201
SBP_αHF              0.002345
SBP_αLF              0.001246
dtype: float64


In [37]:
excel_output_df = sorted_scores_df.reset_index(name='mRMR_Score')
excel_output_df.rename(columns={'index': 'Feature_Name'}, inplace=True)

# Define the filename for your Excel file
excel_filename = '10mRMR_Scores_Output.xlsx'

# Save the DataFrame to an Excel file
# index=False prevents Pandas from writing the DataFrame index as a column in the Excel file.
excel_output_df.to_excel(excel_filename, index=False)

print(f"\nSuccessfully saved sorted mRMR scores to '{excel_filename}'")


Successfully saved sorted mRMR scores to '10mRMR_Scores_Output.xlsx'


In [77]:
# Use selected features
X_mrmr = df[mrmr_output]
X_mrmr.head()

Unnamed: 0,SBP_Power_nu,Age,Cardiovagal_Score,RRI_Power_nu,Disease_durn,SBP_HF_nu,Age of onset,Adrenergic_Score,SBP_RMSSD,ICARS,Sudomotor_Score,RRI_HF_nu,RRI_SD2_nu,CASS,SBP_SDNN
0,153.61,34,2,136.482,6.0,25.291,28.0,1,2.4562,28,3,30.298,7.374,6,4.8508
1,477.093,34,1,412.771,2.0,45.533,32.0,1,1.7678,42,3,41.508,1.7183,5,5.5064
2,501.601,32,1,450.714,15.0,8.12,17.0,1,0.7926,50,3,19.04,2.9097,5,2.6318
3,860.421,32,0,174.354,5.0,35.37,27.0,1,1.0626,39,0,31.302,3.5086,1,3.4819
4,189.9,18,0,106.227,1.0,24.675,17.0,1,1.6306,9,3,65.528,3.6753,4,3.3065


In [78]:
X_train_mrmr, X_test_mrmr, y_train_mrmr, y_test_mrmr = train_test_split(X_mrmr, y, test_size=0.2, stratify=y, random_state=42)
# Standardize
X_train_mrmr_scaled = scaler.fit_transform(X_train_mrmr)
X_test_mrmr_scaled = scaler.transform(X_test_mrmr)

print("X_train_mrmr_scaled:",X_train_mrmr_scaled.shape)
print("X_test_mrmr_scaled:",X_test_mrmr_scaled.shape)
print("y_train_mrmr:",y_train_mrmr.shape)
print("y_test_mrmr:",y_test_mrmr.shape)


X_train_mrmr_scaled: (56, 15)
X_test_mrmr_scaled: (15, 15)
y_train_mrmr: (56,)
y_test_mrmr: (15,)


In [79]:

# Grid search again
grid_svm_mrmr = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy')
grid_svm_mrmr.fit(X_train_mrmr_scaled, y_train_mrmr)

# Predict
y_pred_mrmr = grid_svm_mrmr.predict(X_test_mrmr_scaled)

print("\n>>> SVM with mRMR (Top 10 Features) <<<")
print("Best Params:", grid_svm_mrmr.best_params_)
print("Accuracy:", grid_svm_mrmr.best_score_)
print("Test Performance:")
print(classification_report(y_test_mrmr, y_pred_mrmr))
print("Confusion Matrix:\n", confusion_matrix(y_test_mrmr, y_pred_mrmr))


>>> SVM with mRMR (Top 10 Features) <<<
Best Params: {'C': 10, 'class_weight': None, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 0.5878787878787879
Test Performance:
              precision    recall  f1-score   support

           0       0.56      0.71      0.62         7
           1       0.67      0.50      0.57         8

    accuracy                           0.60        15
   macro avg       0.61      0.61      0.60        15
weighted avg       0.61      0.60      0.60        15

Confusion Matrix:
 [[5 2]
 [4 4]]
