## Baseline + Normalization

In [116]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import RidgeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix

from imblearn.under_sampling import RandomUnderSampler

In [117]:
# Read Normal Datasets
#DATASET
df_path= 'Dataset\hypertension_data.csv'
df = pd.read_csv(df_path)

# mengisi missing value pada kolom sex dengan modus
df['sex'] = df['sex'].fillna(df['sex'].mode()[0])

In [118]:
# # Splitting Data
X = df.drop(columns=['target'])
y = df['target']

# Terapkan Random Under Sampling
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X, y)

# Bagi dataset menjadi data latih (training) dan data uji (testing) dengan proporsi 80:20
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [119]:
# Normalisasi Data Train:
X_train_norm = (X_train - X_train.min()) / (X_train.max() - X_train.min())
# Normalisasi Data Test: Gunakan max dan min dari data train
X_test_norm = (X_test - X_train.min()) / (X_train.max() - X_train.min())

## 3. Metode klasifikasi:
1. Support Vector Machine
2. Gaussian Naive Bayes
3. Ridge Classifier
4. Linear Discriminant Analysis
5. Logistic Regression

### 1. Support Vector Machine

In [120]:
# Inisialisasi model SVM
svm_model = SVC(kernel='linear')

In [121]:
# Latih model pada data latih
svm_model.fit(X_train_norm, y_train)

In [122]:
# Lakukan prediksi pada data uji
svm_pred = svm_model.predict(X_test_norm)

### 2. Gaussian Naive Bayes

In [123]:
# Inisialisasi model Gaussian Naive Bayes
gnb = GaussianNB()

In [124]:
# Latih model pada data latih
gnb.fit(X_train_norm, y_train)

In [125]:
gnb_pred = gnb.predict(X_test_norm)

### 3. Ridge Classifier

In [126]:
ridge_classifier = RidgeClassifier(alpha=1.0, solver='auto', random_state=42)

In [127]:
# Melatih model pada data pelatihan
ridge_classifier.fit(X_train_norm, y_train)

In [128]:
# Melakukan prediksi pada data uji
rc_pred = ridge_classifier.predict(X_test_norm)

### 4. Linear Discriminant Analysist

In [129]:
lda = LinearDiscriminantAnalysis()

In [130]:
lda.fit(X_train_norm, y_train)

In [131]:
lda_pred = lda.predict(X_test_norm)

### 5. Logistic Regression

In [132]:
logistic_regression = LogisticRegression(max_iter=1000, random_state=42)

In [133]:
logistic_regression.fit(X_train_norm, y_train)

In [134]:
lr_pred = logistic_regression.predict(X_test_norm)

## 4. Model evaluator:
1. Precision
2. Recall
3. F1-measure

### 1. SVM

In [135]:
#codeblock for evaluator
# Hitung precision, recall, dan F1-score
precision = precision_score(y_test, svm_pred, average='weighted')
recall = recall_score(y_test, svm_pred, average='weighted')
f1 = f1_score(y_test, svm_pred, average='weighted')

# Tampilkan hasil evaluasi
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1-score: {f1:.3f}')
print()
print(confusion_matrix(y_test, svm_pred))
print(classification_report(y_test, svm_pred))

Precision: 0.843
Recall: 0.838
F1-score: 0.837

[[1817  535]
 [ 231 2141]]
              precision    recall  f1-score   support

           0       0.89      0.77      0.83      2352
           1       0.80      0.90      0.85      2372

    accuracy                           0.84      4724
   macro avg       0.84      0.84      0.84      4724
weighted avg       0.84      0.84      0.84      4724



### 2. Gaussian Naive Bayes

In [136]:
# Hitung precision, recall, dan F1-score
precision = precision_score(y_test, gnb_pred, average='weighted')
recall = recall_score(y_test, gnb_pred, average='weighted')
f1 = f1_score(y_test, gnb_pred, average='weighted')

# Tampilkan hasil evaluasi
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1-score: {f1:.3f}')
print()
print(confusion_matrix(y_test, gnb_pred))
print(classification_report(y_test, gnb_pred))

Precision: 0.829
Recall: 0.828
F1-score: 0.828

[[1887  465]
 [ 348 2024]]
              precision    recall  f1-score   support

           0       0.84      0.80      0.82      2352
           1       0.81      0.85      0.83      2372

    accuracy                           0.83      4724
   macro avg       0.83      0.83      0.83      4724
weighted avg       0.83      0.83      0.83      4724



### 3. Ridge Classifier

In [137]:
#codeblock for evaluator
# Hitung precision, recall, dan F1-score
precision = precision_score(y_test, rc_pred, average='weighted')
recall = recall_score(y_test, rc_pred, average='weighted')
f1 = f1_score(y_test, rc_pred, average='weighted')

# Tampilkan hasil evaluasi
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1-score: {f1:.3f}')
print()
print(confusion_matrix(y_test, rc_pred))
print(classification_report(y_test, rc_pred))

Precision: 0.858
Recall: 0.850
F1-score: 0.849

[[1827  525]
 [ 183 2189]]
              precision    recall  f1-score   support

           0       0.91      0.78      0.84      2352
           1       0.81      0.92      0.86      2372

    accuracy                           0.85      4724
   macro avg       0.86      0.85      0.85      4724
weighted avg       0.86      0.85      0.85      4724



### 4. Linear Discriminant Analysist

In [138]:
# Hitung precision, recall, dan F1-score
precision = precision_score(y_test, lda_pred, average='weighted')
recall = recall_score(y_test, lda_pred, average='weighted')
f1 = f1_score(y_test, lda_pred, average='weighted')

# Tampilkan hasil evaluasi
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1-score: {f1:.3f}')
print()
print(confusion_matrix(y_test, lda_pred))
print(classification_report(y_test, lda_pred))

Precision: 0.858
Recall: 0.850
F1-score: 0.849

[[1827  525]
 [ 183 2189]]
              precision    recall  f1-score   support

           0       0.91      0.78      0.84      2352
           1       0.81      0.92      0.86      2372

    accuracy                           0.85      4724
   macro avg       0.86      0.85      0.85      4724
weighted avg       0.86      0.85      0.85      4724



### 5. Logistic Regression

In [139]:
# Hitung precision, recall, dan F1-score
precision = precision_score(y_test, lr_pred, average='weighted')
recall = recall_score(y_test, lr_pred, average='weighted')
f1 = f1_score(y_test, lr_pred, average='weighted')

# Tampilkan hasil evaluasi
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1-score: {f1:.3f}')
print()
print(confusion_matrix(y_test, lr_pred))
print(classification_report(y_test, lr_pred))

Precision: 0.850
Recall: 0.846
F1-score: 0.846

[[1859  493]
 [ 233 2139]]
              precision    recall  f1-score   support

           0       0.89      0.79      0.84      2352
           1       0.81      0.90      0.85      2372

    accuracy                           0.85      4724
   macro avg       0.85      0.85      0.85      4724
weighted avg       0.85      0.85      0.85      4724

