In [1]:
import numpy as np
import pandas as pd

# membaca dataset dari file CSV
df = pd.read_csv('dataset_berat_balita.csv')
df

Unnamed: 0,Umur (bulan),Berat Badan (kg),Jenis Kelamin,Status Gizi
0,0,0.715441,perempuan,severely underweight
1,0,5.610281,perempuan,overweight
2,0,0.907997,perempuan,severely underweight
3,0,5.509305,perempuan,overweight
4,0,3.565381,perempuan,normal
...,...,...,...,...
122117,60,17.450974,laki-laki,normal
122118,60,20.970287,laki-laki,normal
122119,60,24.600348,laki-laki,overweight
122120,60,19.047613,laki-laki,normal


In [2]:
# mengkodekan jenis kelamin
df['Jenis Kelamin'] = df['Jenis Kelamin'].map({'laki-laki': 0, 'perempuan': 1})

# mengkodekan status gizi
df['Status Gizi'] = df['Status Gizi'].map({'severely underweight': 0, 'underweight': 1, 'normal': 2, 'overweight': 3})
df

Unnamed: 0,Umur (bulan),Berat Badan (kg),Jenis Kelamin,Status Gizi
0,0,0.715441,1,0
1,0,5.610281,1,3
2,0,0.907997,1,0
3,0,5.509305,1,3
4,0,3.565381,1,2
...,...,...,...,...
122117,60,17.450974,0,2
122118,60,20.970287,0,2
122119,60,24.600348,0,3
122120,60,19.047613,0,2


In [3]:
from sklearn.model_selection import train_test_split

# memisahkan fitur dan target
X = df.iloc[:, 1:6]
X = df.drop('Status Gizi', axis=1)
y = df['Status Gizi']

# membagi dataset menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

X

Unnamed: 0,Umur (bulan),Berat Badan (kg),Jenis Kelamin
0,0,0.715441,1
1,0,5.610281,1
2,0,0.907997,1
3,0,5.509305,1
4,0,3.565381,1
...,...,...,...
122117,60,17.450974,0
122118,60,20.970287,0
122119,60,24.600348,0
122120,60,19.047613,0


In [4]:
from sklearn.neighbors import KNeighborsClassifier

# membuat objek KNN dengan k=3
knn = KNeighborsClassifier(algorithm='kd_tree', n_neighbors=1)

# melatih model KNN dengan data latih
knn.fit(X_train, y_train)


In [5]:
from sklearn.metrics import accuracy_score

# membuat prediksi dengan data uji
y_pred = knn.predict(X_test)

# menghitung akurasi model
accuracy = accuracy_score(y_test, y_pred)
print('Akurasi model KNN:', accuracy)


Akurasi model KNN: 0.9981440034936405


In [6]:
from sklearn.metrics import classification_report, confusion_matrix

In [7]:
print(classification_report(y_test.values.reshape(-1, 1), y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4802
           1       0.99      0.99      0.99      1304
           2       1.00      1.00      1.00      5179
           3       1.00      1.00      1.00      7034

    accuracy                           1.00     18319
   macro avg       1.00      1.00      1.00     18319
weighted avg       1.00      1.00      1.00     18319



In [8]:
from sklearn.model_selection import cross_val_score

In [9]:
print('Cross val: ', cross_val_score(knn, y_test.values.reshape(-1, 1), y_pred, cv = 5))
print('Accuracy : ', np.mean(cross_val_score(knn, y_test.values.reshape(-1, 1), y_pred)))

Cross val:  [0.99836245 0.99781659 0.99863537 0.99808952 0.997816  ]
Accuracy :  0.9981439855894004


In [10]:
test = pd.DataFrame({'Umur (bulan)': [3], 'Berat Badan (kg)': [6], 'Jenis Kelamin': [0]})

pred = knn.predict(test)
print("Predicted Score = {}".format(np.array(pred)))

Predicted Score = [2]


In [11]:
# Export the model
import joblib

joblib.dump(knn, 'knn_berat_status_gizi.model')

['knn_berat_status_gizi.model']