In [None]:
#Import Library
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
import pickle

#Load Data
data = pd.read_csv("stunting.csv")  # Gantilah nama file jika berbeda
data.rename(columns={'Stunting': 'kategori', 'Wasting': 'deskripsi'}, inplace=True)

#Hapus data kosong dan duplikat
data.dropna(inplace=True)
data.drop_duplicates(inplace=True)

#Hapus outlier
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    return df[(df[column] >= lower) & (df[column] <= upper)]

for col in ['Umur (bulan)', 'Tinggi Badan (cm)', 'Berat Badan (kg)']:
    data = remove_outliers(data, col)

#Label Encoding
label_encoders = {}
for col in ['Jenis Kelamin', 'kategori', 'deskripsi']:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

#pisahkan fitur
X = data[['Jenis Kelamin', 'Umur (bulan)', 'Tinggi Badan (cm)', 'Berat Badan (kg)']]

#Model untuk kategori
y_kategori = data['kategori']
X_train, X_test, y_train, y_test = train_test_split(X, y_kategori, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn_kategori = KNeighborsClassifier(n_neighbors=1)
knn_kategori.fit(X_train_scaled, y_train)
y_pred = knn_kategori.predict(X_test_scaled)

print("=== HASIL PREDIKSI KATEGORI ===")
print(classification_report(y_test, y_pred))

#Model untuk deskripsi
y_deskripsi = data['deskripsi']
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y_deskripsi, test_size=0.2, random_state=42)

X_train2_scaled = scaler.fit_transform(X_train2)
X_test2_scaled = scaler.transform(X_test2)

knn_deskripsi = KNeighborsClassifier(n_neighbors=1)
knn_deskripsi.fit(X_train2_scaled, y_train2)
y_pred2 = knn_deskripsi.predict(X_test2_scaled)

print("HASIL PREDIKSI DESKRIPSI")
print(classification_report(y_test2, y_pred2))

#menyimpan model & scaler
with open("knn_kategori_model.pkl", "wb") as f:
    pickle.dump(knn_kategori, f)

with open("knn_deskripsi_model.pkl", "wb") as f:
    pickle.dump(knn_deskripsi, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

print("✅ Semua model dan encoder berhasil disimpan.")


=== HASIL PREDIKSI KATEGORI ===
              precision    recall  f1-score   support

           0       0.99      0.98      0.98     13500
           1       0.91      0.90      0.91      1031
           2       0.93      0.93      0.93      2951
           3       0.89      0.91      0.90      1057

    accuracy                           0.97     18539
   macro avg       0.93      0.93      0.93     18539
weighted avg       0.97      0.97      0.97     18539

=== HASIL PREDIKSI DESKRIPSI ===
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      6785
           1       0.99      0.99      0.99      7672
           2       0.97      0.98      0.97      2215
           3       0.95      0.93      0.94      1867

    accuracy                           0.98     18539
   macro avg       0.97      0.97      0.97     18539
weighted avg       0.98      0.98      0.98     18539

✅ Semua model dan encoder berhasil disimpan.
