In [161]:
import pandas as pd

In [162]:
df = pd.read_csv("heart.csv")
df1 = pd.DataFrame(df["HeartDisease"])

In [163]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Assuming df is your pandas DataFrame containing categorical columns
# Selecting columns of type "object"
categorical_columns = df.select_dtypes(include=["object"]).columns.tolist()

# Creating a ColumnTransformer
# Each transformer is a tuple of (name, transformer, columns)
# Here, we apply OneHotEncoder to each categorical column
ct = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(), categorical_columns)],
    remainder='passthrough'  # This ensures non-categorical columns are passed through
)

# Transforming the DataFrame
df = ct.fit_transform(df.drop(columns = ["HeartDisease"]))
df = pd.DataFrame(df, columns = ct.get_feature_names_out())
df

Unnamed: 0,encoder__Sex_F,encoder__Sex_M,encoder__ChestPainType_ASY,encoder__ChestPainType_ATA,encoder__ChestPainType_NAP,encoder__ChestPainType_TA,encoder__RestingECG_LVH,encoder__RestingECG_Normal,encoder__RestingECG_ST,encoder__ExerciseAngina_N,encoder__ExerciseAngina_Y,encoder__ST_Slope_Down,encoder__ST_Slope_Flat,encoder__ST_Slope_Up,remainder__Age,remainder__RestingBP,remainder__Cholesterol,remainder__FastingBS,remainder__MaxHR,remainder__Oldpeak
0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,40.0,140.0,289.0,0.0,172.0,0.0
1,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,49.0,160.0,180.0,0.0,156.0,1.0
2,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,37.0,130.0,283.0,0.0,98.0,0.0
3,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,48.0,138.0,214.0,0.0,108.0,1.5
4,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,54.0,150.0,195.0,0.0,122.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,45.0,110.0,264.0,0.0,132.0,1.2
914,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,68.0,144.0,193.0,1.0,141.0,3.4
915,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,57.0,130.0,131.0,0.0,115.0,1.2
916,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,57.0,130.0,236.0,0.0,174.0,0.0


In [164]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)
df = pd.DataFrame(df_scaled, columns = df.columns)
df["HeartDisease"] = df1["HeartDisease"]

In [165]:
X = df.drop("HeartDisease", axis = 1)
y = df["HeartDisease"]

In [172]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV


model_params = {
    "LogisticRegression":{
        "model":LogisticRegression(max_iter = 500),
        "params":{
            "C":[i for i in range(1, 10, 2)]
        }
    },
    "RandomForestClassifier":{
        "model":RandomForestClassifier(),
        "params":{
            "n_estimators":[i for i in range(50, 200, 25)],
            "criterion":["gini", "entropy"]
        }
    },
    "SVC":{
        "model":SVC(gamma = "auto"),
        "params":{
            "kernel":["rbf", "linear"],
            "C":[i for i in range(1, 10, 2)]
        }
    }
}
score = []
for model_name, mp in model_params.items():
    gs = GridSearchCV(mp["model"], mp["params"], cv=5, return_train_score = False)
    for i in range(1, 5):
        p = PCA(n_components = i)
        X_decomposed = p.fit_transform(X)
        gs.fit(X_decomposed, y)
        score.append({"model_name":model_name, "best_parameters":gs.best_params_, "score":gs.best_score_})
best_results = pd.DataFrame(score)
results = pd.DataFrame(gs.cv_results_)

In [173]:
best_results

Unnamed: 0,model_name,best_parameters,score
0,LogisticRegression,{'C': 1},0.844155
1,LogisticRegression,{'C': 1},0.838691
2,LogisticRegression,{'C': 1},0.826728
3,LogisticRegression,{'C': 1},0.8518
4,RandomForestClassifier,"{'criterion': 'gini', 'n_estimators': 100}",0.783114
5,RandomForestClassifier,"{'criterion': 'gini', 'n_estimators': 125}",0.817997
6,RandomForestClassifier,"{'criterion': 'entropy', 'n_estimators': 50}",0.813679
7,RandomForestClassifier,"{'criterion': 'entropy', 'n_estimators': 175}",0.811499
8,SVC,"{'C': 9, 'kernel': 'rbf'}",0.845242
9,SVC,"{'C': 3, 'kernel': 'linear'}",0.840859


In [176]:
results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.014503,0.003257,0.008003,4e-06,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.929348,0.836957,0.820652,0.84153,0.781421,0.841981,0.04854,4
1,0.014402,0.003194,0.001602,0.003204,1,linear,"{'C': 1, 'kernel': 'linear'}",0.923913,0.836957,0.847826,0.846995,0.73224,0.837586,0.061227,8
2,0.008621,0.001214,0.004669,0.003956,3,rbf,"{'C': 3, 'kernel': 'rbf'}",0.929348,0.842391,0.809783,0.84153,0.765027,0.837616,0.053845,7
3,0.010831,0.003781,0.001564,0.001526,3,linear,"{'C': 3, 'kernel': 'linear'}",0.923913,0.836957,0.847826,0.852459,0.743169,0.840865,0.057688,5
4,0.011193,0.002405,0.007277,0.002608,5,rbf,"{'C': 5, 'kernel': 'rbf'}",0.929348,0.836957,0.809783,0.852459,0.759563,0.837622,0.055682,6
5,0.008801,0.002441,0.002906,0.00359,5,linear,"{'C': 5, 'kernel': 'linear'}",0.923913,0.836957,0.847826,0.863388,0.743169,0.843051,0.05829,1
6,0.011835,0.003717,0.004046,0.002669,7,rbf,"{'C': 7, 'kernel': 'rbf'}",0.929348,0.831522,0.809783,0.846995,0.759563,0.835442,0.055467,10
7,0.010482,0.005392,0.00221,0.003043,7,linear,"{'C': 7, 'kernel': 'linear'}",0.923913,0.836957,0.847826,0.863388,0.743169,0.843051,0.05829,1
8,0.009613,0.002962,0.0059,0.003109,9,rbf,"{'C': 9, 'kernel': 'rbf'}",0.929348,0.831522,0.809783,0.846995,0.765027,0.836535,0.053996,9
9,0.011194,0.004017,0.003177,0.003891,9,linear,"{'C': 9, 'kernel': 'linear'}",0.923913,0.836957,0.847826,0.863388,0.743169,0.843051,0.05829,1


In [177]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV


model_params = {
    "LogisticRegression":{
        "model":LogisticRegression(max_iter = 500),
        "params":{
            "C":[i for i in range(1, 10, 2)]
        }
    },
    "RandomForestClassifier":{
        "model":RandomForestClassifier(),
        "params":{
            "n_estimators":[i for i in range(50, 200, 25)],
            "criterion":["gini", "entropy"]
        }
    },
    "SVC":{
        "model":SVC(gamma = "auto"),
        "params":{
            "kernel":["rbf", "linear"],
            "C":[i for i in range(1, 10, 2)]
        }
    }
}
score = []
for model_name, mp in model_params.items():
    gs = GridSearchCV(mp["model"], mp["params"], cv=5, return_train_score = False)
    X_decomposed = p.fit_transform(X)
    gs.fit(X_decomposed, y)
    score.append({"model_name":model_name, "best_parameters":gs.best_params_, "score":gs.best_score_})
best_results = pd.DataFrame(score)
results = pd.DataFrame(gs.cv_results_)

In [178]:
best_results

Unnamed: 0,model_name,best_parameters,score
0,LogisticRegression,{'C': 1},0.8518
1,RandomForestClassifier,"{'criterion': 'entropy', 'n_estimators': 125}",0.813679
2,SVC,"{'C': 5, 'kernel': 'linear'}",0.843051
