In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score, GridSearchCV, cross_validate
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [9]:
import pandas as pd
df = pd.read_csv("../data/dataset.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [10]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

print("Feature Shape:", X.shape)
print("Target Shape :", y.shape)

Feature Shape: (303, 13)
Target Shape : (303,)


In [11]:
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
dt = DecisionTreeClassifier(random_state=42)
dt_scores = cross_val_score(dt, X, y, cv=kfold, scoring='accuracy')
print("Decision Tree K-Fold Accuracy Scores:", dt_scores)
print("Mean Accuracy:", dt_scores.mean())

Decision Tree K-Fold Accuracy Scores: [0.75409836 0.75409836 0.80327869 0.78333333 0.78333333]
Mean Accuracy: 0.7756284153005464


In [12]:
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
svm_pipeline = Pipeline([('scaler', StandardScaler()), ('svm', SVC())])
svm_scores = cross_val_score(svm_pipeline, X, y, cv=skfold, scoring='accuracy')
print("SVM Stratified K-Fold Accuracy Scores:", svm_scores)
print("Mean Accuracy:", svm_scores.mean())


SVM Stratified K-Fold Accuracy Scores: [0.90163934 0.83606557 0.73770492 0.83333333 0.8       ]
Mean Accuracy: 0.8217486338797816


In [13]:
rf = RandomForestClassifier(random_state=42)
rf_scores = cross_val_score(rf, X, y, cv=skfold, scoring='accuracy')
print("Random Forest Accuracy Scores:", rf_scores)
print("Mean Accuracy:", rf_scores.mean())


Random Forest Accuracy Scores: [0.85245902 0.83606557 0.75409836 0.81666667 0.81666667]
Mean Accuracy: 0.815191256830601


In [14]:
param_grid = {'n_estimators': [100, 200], 'max_depth': [None, 10, 20], 'min_samples_split': [2, 5]}
grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=skfold, scoring='accuracy', n_jobs=-1)
grid.fit(X, y)
print("Best Parameters:", grid.best_params_)
print("Best Cross-Validation Accuracy:", grid.best_score_)

Best Parameters: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 200}
Best Cross-Validation Accuracy: 0.8383060109289617


In [15]:
best_rf = grid.best_estimator_
best_rf.fit(X, y)
y_pred = best_rf.predict(X)
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred, average='weighted')
recall = recall_score(y, y_pred, average='weighted')
f1 = f1_score(y, y_pred, average='weighted')
print("Final Model Performance:")
print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)
print("F1-Score :", f1)


Final Model Performance:
Accuracy : 0.9801980198019802
Precision: 0.9802590509138973
Recall   : 0.9801980198019802
F1-Score : 0.9801853943199261


In [16]:
scoring = {'accuracy': 'accuracy', 'precision': 'precision_weighted', 'recall': 'recall_weighted', 'f1': 'f1_weighted'}
cv_results = cross_validate(best_rf, X, y, cv=skfold, scoring=scoring)
print("Final Cross-Validated Performance (Optimized Random Forest)")
print("Accuracy :", cv_results['test_accuracy'].mean())
print("Precision:", cv_results['test_precision'].mean())
print("Recall   :", cv_results['test_recall'].mean())
print("F1-Score :", cv_results['test_f1'].mean())


Final Cross-Validated Performance (Optimized Random Forest)
Accuracy : 0.8383060109289617
Precision: 0.8424493510631796
Recall   : 0.8383060109289617
F1-Score : 0.8364514364482819


In [17]:
comparison = pd.DataFrame({
    'Model': ['Decision Tree', 'SVM', 'Random Forest (Tuned)'],
    'CV Accuracy': [dt_scores.mean(), svm_scores.mean(), cv_results['test_accuracy'].mean()]
})
comparison
comparison.to_csv("model_comparison_results.csv", index=False)
print("Comparison saved as 'model_comparison_results.csv'")


Comparison saved as 'model_comparison_results.csv'
