## Baseline + Normalization

In [5]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

In [6]:
# Read Normal Datasets
df_path= 'Dataset/diabetes_binary_5050split_health_indicators_BRFSS2015.csv'
df = pd.read_csv(df_path)

In [7]:
X = df.iloc[:,1:]
y = df['Diabetes_binary']

# Bagi dataset menjadi data latih (training) dan data uji (testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Normalization

X_train_norm = (X_train - X_train.min()) / (X_train.max() - X_train.min())
X_test_norm = (X_test - X_test.min()) / (X_test.max() - X_test.min())

## SVM Model

In [14]:
# Inisialisasi model SVM
from sklearn.svm import SVC
svm_model = SVC(kernel='linear')

In [15]:
# Latih model pada data latih
svm_model.fit(X_train_norm, y_train)

# with open('pickle/svm_model_norm', 'wb') as file:
#     pickle.dump(svm_model, file)

In [22]:
# Lakukan prediksi pada data uji
svm_pred = svm_model.predict(X_test_norm)

In [23]:
from sklearn.metrics import classification_report,confusion_matrix

print(confusion_matrix(y_test, svm_pred))
print(classification_report(y_test, svm_pred))

[[5071 2019]
 [1563 5486]]
              precision    recall  f1-score   support

         0.0       0.76      0.72      0.74      7090
         1.0       0.73      0.78      0.75      7049

    accuracy                           0.75     14139
   macro avg       0.75      0.75      0.75     14139
weighted avg       0.75      0.75      0.75     14139



In [24]:
# Hitung precision, recall, dan F1-score
precision = precision_score(y_test, svm_pred, average='weighted')
recall = recall_score(y_test, svm_pred, average='weighted')
f1 = f1_score(y_test, svm_pred, average='weighted')

# Tampilkan hasil evaluasi
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.7477358924331233
Recall: 0.746658179503501
F1-score: 0.746418154355341


## Random Forest Model

In [9]:
from sklearn.ensemble import RandomForestClassifier
rfc_model = RandomForestClassifier(n_estimators=500)

In [10]:
rfc_model.fit(X_train_norm, y_train)

# with open('pickle/rfc_model_norm', 'wb') as file:
#     pickle.dump(rfc_model, file)

In [11]:
rfc_pred = rfc_model.predict(X_test_norm)

In [12]:
from sklearn.metrics import classification_report,confusion_matrix

print(confusion_matrix(y_test, rfc_pred))
print(classification_report(y_test, rfc_pred))

[[5042 2048]
 [1643 5406]]
              precision    recall  f1-score   support

         0.0       0.75      0.71      0.73      7090
         1.0       0.73      0.77      0.75      7049

    accuracy                           0.74     14139
   macro avg       0.74      0.74      0.74     14139
weighted avg       0.74      0.74      0.74     14139



In [13]:
# Hitung precision, recall, dan F1-score
precision = precision_score(y_test, rfc_pred, average='weighted')
recall = recall_score(y_test, rfc_pred, average='weighted')
f1 = f1_score(y_test, rfc_pred, average='weighted')

# Tampilkan hasil evaluasi
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.7397790483362503
Recall: 0.738949006294646
F1-score: 0.7387563725783748
