## loading liberaries

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, precision_recall_curve, roc_curve, auc
from sklearn.preprocessing import StandardScaler
from scipy import stats

In [13]:
# Load your dataset
file_path = '/content/cleaned_hypertension_data-2.csv'
data = pd.read_csv(file_path)

# Outlier detection and removal
z_scores = np.abs(stats.zscore(data))
data = data[(z_scores < 3).all(axis=1)]
# Splitting data into features and target
X = data.drop('Class', axis=1)  # Assuming 'Class' is the target column
y = data['Class']

In [14]:
# Applying Standard Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define a range of hyperparameters for tuning
n_estimators_range = [50, 100, 150, 200]
learning_rate_range = [0.01, 0.1, 1.0]

best_accuracy = 0
best_params = {}
# Hyperparameter Tuning
for n_estimators in n_estimators_range:
    for learning_rate in learning_rate_range:
        model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate, random_state=42)
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        cv_results = cross_val_score(model, X_scaled, y, cv=skf, scoring='accuracy')
        max_accuracy_index = np.argmax(cv_results)
        if cv_results[max_accuracy_index] > best_accuracy:
            best_accuracy = cv_results[max_accuracy_index]
            best_params = {'n_estimators': n_estimators, 'learning_rate': learning_rate}

In [15]:
# Training the best model with optimal threshold
best_model = AdaBoostClassifier(**best_params, random_state=42)
X_train_final, X_test_final, y_train_final, y_test_final = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
best_model.fit(X_train_final, y_train_final)
y_pred_proba_final = best_model.predict_proba(X_test_final)[:, 1]

In [16]:
# Optimal Threshold Calculation
precision_final, recall_final, thresholds_final = precision_recall_curve(y_test_final, y_pred_proba_final)
fscore_final = (2 * precision_final * recall_final) / (precision_final + recall_final)
ix_final = np.argmax(fscore_final)
optimal_threshold_final = thresholds_final[ix_final]
y_pred_final = (y_pred_proba_final >= optimal_threshold_final).astype(int)




In [17]:
# Evaluating model performance
accuracy_final = accuracy_score(y_test_final, y_pred_final)  # Correct accuracy calculation
precision_final = precision_score(y_test_final, y_pred_final)
recall_final = recall_score(y_test_final, y_pred_final)
f1_final = f1_score(y_test_final, y_pred_final)
conf_matrix_final = confusion_matrix(y_test_final, y_pred_final)

# ROC Curve and AUC
fpr, tpr, _ = roc_curve(y_test_final, y_pred_proba_final)
roc_auc = auc(fpr, tpr)

# Print results
print(f"Best Parameters: {best_params}")
print(f"Optimal Threshold: {optimal_threshold_final}")
print(f"Accuracy: {accuracy_final}")
print(f"Precision: {precision_final}")
print(f"Recall: {recall_final}")
print(f"F1 Score: {f1_final}")
print(f"Confusion Matrix:\n{conf_matrix_final}")
print(f"ROC AUC: {roc_auc}")

Best Parameters: {'n_estimators': 50, 'learning_rate': 0.1}
Optimal Threshold: 0.47485040925205974
Accuracy: 0.8775
Precision: 0.8168316831683168
Recall: 0.9322033898305084
F1 Score: 0.8707124010554088
Confusion Matrix:
[[186  37]
 [ 12 165]]
ROC AUC: 0.9400699247548834
