# Klasifikasi Resiko Penyakit Hipertensi Menggunakan Algoritma Ada-Boost

In [None]:
# Impor pustaka yang diperlukan
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Import Dataset
dataset = pd.read_excel("dataset after oversampling.xlsx")
dataset

Unnamed: 0,UMUR(TAHUN),JENIS KELAMIN,MEROKOK,DILAKUKAN PENGUKURAN TEKANAN DARAH,MINUM OBAT HIPERTENSI TERATUR,BERAT BADAN,LINGKAR PERUT,ASAM URAT,GULA DARAH,KOLESTEROL,DI DIAGNOSIS HIPERTENSI
0,35,perempuan,T,N,Y,58.794011,80.627276,4.851437,147.199919,179.074059,Y
1,61,perempuan,T,N,Y,58.794011,80.627276,4.851437,147.199919,179.074059,Y
2,50,perempuan,T,N,Y,58.794011,80.627276,4.851437,147.199919,179.074059,Y
3,43,perempuan,T,N,Y,58.794011,80.627276,4.851437,147.199919,179.074059,Y
4,64,perempuan,T,N,Y,58.794011,80.627276,4.851437,147.199919,179.074059,Y
...,...,...,...,...,...,...,...,...,...,...,...
16345,41,laki-laki,T,Y,N,58.794011,80.627276,4.851437,147.199919,179.074059,T
16346,71,perempuan,T,Y,N,58.794011,80.627276,4.851437,147.199919,179.074059,T
16347,17,perempuan,T,Y,N,58.794011,80.627276,4.851437,147.199919,179.074059,T
16348,46,perempuan,T,Y,N,58.794011,80.627276,4.851437,147.199919,179.074059,T


In [None]:
# Melihat Info Data
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16350 entries, 0 to 16349
Data columns (total 11 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   UMUR(TAHUN)                         16350 non-null  int64  
 1   JENIS KELAMIN                       16350 non-null  object 
 2   MEROKOK                             16350 non-null  object 
 3   DILAKUKAN PENGUKURAN TEKANAN DARAH  16350 non-null  object 
 4   MINUM OBAT HIPERTENSI TERATUR       16350 non-null  object 
 5   BERAT BADAN                         16350 non-null  float64
 6   LINGKAR PERUT                       16350 non-null  float64
 7   ASAM URAT                           16350 non-null  float64
 8   GULA DARAH                          16350 non-null  float64
 9   KOLESTEROL                          16350 non-null  float64
 10  DI DIAGNOSIS HIPERTENSI             16350 non-null  object 
dtypes: float64(5), int64(1), object(5)
memory

In [None]:
# Buat objek LabelEncoder
label_encoder = LabelEncoder()

# Daftar kolom yang ingin di-label encoding
columns_to_encode = ['JENIS KELAMIN', 'MEROKOK', 'DILAKUKAN PENGUKURAN TEKANAN DARAH',
                     'MINUM OBAT HIPERTENSI TERATUR', 'DI DIAGNOSIS HIPERTENSI']

# Lakukan label encoding untuk setiap kolom
for column in columns_to_encode:
    dataset[column] = label_encoder.fit_transform(dataset[column])

# Hasil kolom yang sudah di-label encoding
print(dataset[columns_to_encode].head())


   JENIS KELAMIN  MEROKOK  DILAKUKAN PENGUKURAN TEKANAN DARAH  \
0              1        0                                   0   
1              1        0                                   0   
2              1        0                                   0   
3              1        0                                   0   
4              1        0                                   0   

   MINUM OBAT HIPERTENSI TERATUR  DI DIAGNOSIS HIPERTENSI  
0                              2                        1  
1                              2                        1  
2                              2                        1  
3                              2                        1  
4                              2                        1  


In [None]:
# Buat objek StandardScaler
scaler = StandardScaler()

# Daftar kolom numerik yang ingin di-standard scaling
columns_to_scale = ['UMUR(TAHUN)', 'BERAT BADAN', 'LINGKAR PERUT', 'ASAM URAT', 'GULA DARAH', 'KOLESTEROL']

# Lakukan standard scaling untuk kolom-kolom numerik
dataset[columns_to_scale] = scaler.fit_transform(dataset[columns_to_scale])

# Hasil kolom-kolom yang sudah di-standard scaling
print(dataset[columns_to_scale].head())


   UMUR(TAHUN)  BERAT BADAN  LINGKAR PERUT  ASAM URAT  GULA DARAH  KOLESTEROL
0    -0.632266    -0.002831      -0.002743  -0.000067   -0.001125    0.000303
1     0.938166    -0.002831      -0.002743  -0.000067   -0.001125    0.000303
2     0.273753    -0.002831      -0.002743  -0.000067   -0.001125    0.000303
3    -0.149056    -0.002831      -0.002743  -0.000067   -0.001125    0.000303
4     1.119370    -0.002831      -0.002743  -0.000067   -0.001125    0.000303


In [None]:
# Pisahkan atribut dan target dari dataset
X = dataset.drop(columns=['DI DIAGNOSIS HIPERTENSI'])
y = dataset['DI DIAGNOSIS HIPERTENSI']

In [None]:
# Bagi data menjadi data training dan data testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((11445, 10), (4905, 10), (11445,), (4905,))

In [None]:
# Bagi data menjadi data training dan data testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Buat model Adaboost
adaboost_model = AdaBoostClassifier(n_estimators=50, random_state=42)

# Latih model dengan data training
adaboost_model.fit(X_train, y_train)

# Lakukan prediksi terhadap data testing
y_pred = adaboost_model.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Tampilkan hasil evaluasi
print("Accuracy:", accuracy)
print("")
print("Confusion Matrix:")
print(conf_matrix)
print("")
print("Classification Report:")
print(classification_rep)

Accuracy: 1.0

Confusion Matrix:
[[2463    0]
 [   0 2442]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2463
           1       1.00      1.00      1.00      2442

    accuracy                           1.00      4905
   macro avg       1.00      1.00      1.00      4905
weighted avg       1.00      1.00      1.00      4905

