In [1]:
import numpy as np
import pandas as pd

# membaca dataset dari file CSV
df = pd.read_csv('data_balita.csv')

In [2]:
# mengkodekan jenis kelamin
df['Jenis Kelamin'] = df['Jenis Kelamin'].map({'laki-laki': 0, 'perempuan': 1})

# mengkodekan status gizi
df['Status Gizi'] = df['Status Gizi'].map({'severely stunted': 0, 'stunted': 1, 'normal': 2, 'tinggi': 3})


In [3]:
from sklearn.model_selection import train_test_split

# memisahkan fitur dan target
X = df.iloc[:, 1:6]
X = df.drop('Status Gizi', axis=1)
y = df['Status Gizi']

# membagi dataset menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=200)

X

Unnamed: 0,Umur (bulan),Jenis Kelamin,Tinggi Badan (cm)
0,0,0,44.591973
1,0,0,56.705203
2,0,0,46.863358
3,0,0,47.508026
4,0,0,42.743494
...,...,...,...
120994,60,1,100.600000
120995,60,1,98.300000
120996,60,1,121.300000
120997,60,1,112.200000


In [4]:
from sklearn.neighbors import KNeighborsClassifier

# membuat objek KNN dengan k=3
knn = KNeighborsClassifier(algorithm='kd_tree', n_neighbors=3)

# melatih model KNN dengan data latih
knn.fit(X_train, y_train)


In [5]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
y_pred_gnb = gnb.fit(X_train, y_train).predict(X_test)

In [6]:
from sklearn.metrics import accuracy_score

# membuat prediksi dengan data uji
y_pred = knn.predict(X_test)

# menghitung akurasi model
# accuracy = accuracy_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print('Akurasi model KNN:', accuracy)


Akurasi model KNN: 0.9974380165289256


In [7]:
accuracy = accuracy_score(y_test, y_pred_gnb)
print('Akurasi model GNB:', accuracy)

Akurasi model GNB: 0.5585950413223141


In [8]:
from sklearn.metrics import classification_report, confusion_matrix

print('confusion_matrix knn:')
confusion_matrix(y_test, y_pred)

confusion_matrix knn:


array([[ 3913,    10,     0,     0],
       [   13,  2827,     6,     0],
       [    0,    12, 13514,    13],
       [    0,     0,     8,  3884]])

In [9]:
print('confusion_matrix gnb:')
confusion_matrix(y_test, y_pred_gnb)

confusion_matrix gnb:


array([[ 1647,     0,  2276,     0],
       [  568,     0,  2278,     0],
       [ 1668,     0, 11871,     0],
       [  707,     0,  3185,     0]])

In [10]:
print('classification_report knn:')
print(classification_report(y_test.values.reshape(-1, 1), y_pred))

classification_report knn:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3923
           1       0.99      0.99      0.99      2846
           2       1.00      1.00      1.00     13539
           3       1.00      1.00      1.00      3892

    accuracy                           1.00     24200
   macro avg       1.00      1.00      1.00     24200
weighted avg       1.00      1.00      1.00     24200



In [11]:
print('classification_report gnb:')
print(classification_report(y_test.values.reshape(-1, 1), y_pred_gnb))

classification_report gnb:
              precision    recall  f1-score   support

           0       0.36      0.42      0.39      3923
           1       0.00      0.00      0.00      2846
           2       0.61      0.88      0.72     13539
           3       0.00      0.00      0.00      3892

    accuracy                           0.56     24200
   macro avg       0.24      0.32      0.28     24200
weighted avg       0.40      0.56      0.46     24200



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
from sklearn.model_selection import cross_val_score

In [13]:
print('akurasi knn:')
print('Cross val: ', cross_val_score(knn, y_test.values.reshape(-1, 1), y_pred, cv = 5))
print('Accuracy : ', np.mean(cross_val_score(knn, y_test.values.reshape(-1, 1), y_pred)))

akurasi knn:
Cross val:  [0.99628099 0.9981405  0.99834711 0.99772727 0.99669421]
Accuracy :  0.9974380165289256


In [14]:
print('akurasi gnb:')
print('Cross val: ', cross_val_score(knn, y_test.values.reshape(-1, 1), y_pred_gnb, cv = 5))
print('Accuracy : ', np.mean(cross_val_score(knn, y_test.values.reshape(-1, 1), y_pred_gnb)))

akurasi gnb:
Cross val:  [0.78202479 0.81033058 0.78409091 0.81033058 0.78884298]
Accuracy :  0.7951239669421488


In [15]:
test = pd.DataFrame({'Umur (bulan)': [19], 'Jenis Kelamin': [1], 'Tinggi Badan (cm)': [72.9]})

pred = knn.predict(test)
print("Predicted Score knn = {}".format(np.array(pred)))

pred = gnb.predict(test)
print("Predicted Score gnb = {}".format(np.array(pred)))

Predicted Score knn = [1]
Predicted Score gnb = [2]


In [16]:
# Export the model
import joblib

joblib.dump(knn, 'knn.model')

['knn.model']