# 1. Menghitung HAZ-Score

## Import Library

In [1]:
import gdown
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Understanding

In [4]:
FILE_ID = '1sNPUBO-vGky4nvJzIRtR6I1U4mbrOIuG'
url = f"https://drive.google.com/uc?id={FILE_ID}"

output = 'stunting_dataset.csv'
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1sNPUBO-vGky4nvJzIRtR6I1U4mbrOIuG
To: /content/stunting_dataset.csv
100%|██████████| 2.89M/2.89M [00:00<00:00, 37.9MB/s]


'stunting_dataset.csv'

In [5]:
data_stunting = pd.read_csv("stunting_dataset.csv")
data_stunting.sample(10)

Unnamed: 0,Id,Jenis Kelamin,Umur (bulan),Tinggi Badan (cm),Berat Badan (kg)
10726,10726,Laki-laki,7,74.2,10.3
72308,72308,Perempuan,14,77.5,10.8
19933,19933,Perempuan,16,77.2,10.0
97313,97313,Perempuan,5,60.2,4.5
28465,28465,Laki-laki,14,83.6,14.1
6094,6094,Perempuan,6,63.9,8.1
76696,76696,Perempuan,4,66.1,7.2
1108,1108,Perempuan,5,64.6,7.3
47041,47041,Laki-laki,3,58.1,8.6
3845,3845,Laki-laki,18,90.1,10.5


## Data Preparation

### Encoding Data Kategori

In [6]:
# Ubah jenis kelamin menjadi numerik
data_stunting["Jenis Kelamin"] = data_stunting["Jenis Kelamin"].map({"Laki-laki": 0, "Perempuan": 1})

### Hitung HAZ-Score

In [7]:
# ==== WHO Growth Standards (Disederhanakan) ====
who_growth_standards = {
    "male": {
        24: {"median": 87.1, "sd": 3.1},
        36: {"median": 95.2, "sd": 3.5},
        48: {"median": 102.3, "sd": 3.8},
        60: {"median": 109.2, "sd": 4.2},
    },
    "female": {
        24: {"median": 85.7, "sd": 3.1},
        36: {"median": 94.0, "sd": 3.5},
        48: {"median": 101.6, "sd": 3.8},
        60: {"median": 108.5, "sd": 4.2},
    },
}


def calculate_haz(age_months, height_cm, gender):
    # Pastikan nilai gender adalah 0 atau 1
    if gender not in [0, 1]:
        # Mungkin ada nilai non-numeric atau NaN, kembalikan nilai default atau tangani
        return None, None # Atau tangani sesuai kebutuhan

    gender_key = "male" if gender == 0 else "female"

    # Handle NaN in age or height
    if pd.isna(age_months) or pd.isna(height_cm):
        return None, None # Return None for HAZ and label if data is missing

    # Temukan usia referensi terdekat
    available_ages = who_growth_standards[gender_key].keys()
    if not available_ages: # Handle case where no age data exists for the gender
        return None, None

    nearest_age = min(available_ages, key=lambda x: abs(x - age_months))
    ref = who_growth_standards[gender_key][nearest_age]

    # Handle potential division by zero if sd is 0, although unlikely with growth standards
    if ref["sd"] == 0:
         return None, "error: zero standard deviation"

    haz = (height_cm - ref["median"]) / ref["sd"]

    # Klasifikasi WHO - 3 kelas
    if haz < -2:
        status = "Stunting"
    elif haz < -1.5:
        status = "Severely Stunting"
    else:
        status = "Normal"

    return round(haz, 2), status

haz_results = data_stunting.apply(
    lambda row: calculate_haz(row["age"], row["height"], row["gender"]), axis=1
)


In [None]:
# # # Contoh WHO Height-for-Age Z-score lookup
# # Contoh WHO Height-for-Age Z-score lookup
# who_haz_lookup = {
#     (0, 1): (49.9, 1.9),
#     (0, 2): (49.1, 1.8),
#     (6, 1): (67.6, 2.6),
#     (6, 2): (65.7, 2.5),
#     (12, 1): (76.1, 2.9),
#     (12, 2): (74.0, 2.8),
#     (24, 1): (87.1, 3.1),
#     (24, 2): (85.7, 3.0),
# }

# def calculate_haz(row):
#     umur = row["Umur (bulan)"]
#     jk = row["Jenis Kelamin"]
#     tinggi = row["Tinggi Badan (cm)"]

#     # Find the closest age group for the given gender
#     closest_key = None
#     min_diff = float('inf')
#     for (lookup_age, lookup_jk) in who_haz_lookup.keys():
#         if lookup_jk == jk:
#             diff = abs(lookup_age - umur)
#             if diff < min_diff:
#                 min_diff = diff
#                 closest_key = (lookup_age, lookup_jk)
#             elif diff == min_diff and lookup_age < umur: # Prefer the closest age less than current if tie
#                  closest_key = (lookup_age, lookup_jk)
#             elif diff == min_diff and lookup_age > umur and closest_key and closest_key[0] > umur: # If closest so far is also > umur, take this one if closer
#                  closest_key = (lookup_age, lookup_jk)


#     if closest_key in who_haz_lookup:
#         median, sd = who_haz_lookup[closest_key]
#         # Handle potential division by zero if SD is 0
#         if sd != 0:
#             return (tinggi - median) / sd
#         else:
#             return None # Return None if SD is 0 to avoid error
#     return None

# data_stunting["HAZ"] = data_stunting.apply(calculate_haz, axis=1)

# # Klasifikasi stunting
# def classify_stunting(haz):
#     if pd.isna(haz):
#         return "Unknown"
#     elif haz < -3:
#         return "Severely Stunted"
#     elif haz < -2:
#         return "Stunted"
#     else:
#         return "Normal"

# data_stunting["Stunting_Status"] = data_stunting["HAZ"].apply(classify_stunting)
# data_stunting.to_csv("stunting_dataset_with_status.csv", index=False)

# data_stunting.head()

Unnamed: 0,Id,Jenis Kelamin,Umur (bulan),Tinggi Badan (cm),Berat Badan (kg),HAZ,Stunting_Status
0,0,1,19,91.6,13.3,1.451613,Normal
1,1,1,20,77.7,8.5,-3.032258,Severely Stunted
2,2,1,10,79.0,10.3,1.0,Normal
3,3,2,2,50.3,8.3,0.666667,Normal
4,4,2,5,56.4,10.9,-3.72,Severely Stunted


# 2. Klasifikasi Stunting

## Data Understanding

In [None]:
stunting = pd.read_csv('/content/stunting_dataset_with_status.csv')

# Display basic info and first few rows
stunting.head()

Unnamed: 0,Id,Jenis Kelamin,Umur (bulan),Tinggi Badan (cm),Berat Badan (kg),HAZ,Stunting_Status
0,0,1,19,91.6,13.3,1.451613,Normal
1,1,1,20,77.7,8.5,-3.032258,Severely Stunted
2,2,1,10,79.0,10.3,1.0,Normal
3,3,2,2,50.3,8.3,0.666667,Normal
4,4,2,5,56.4,10.9,-3.72,Severely Stunted


In [None]:
stunting.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 7 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Id                 100000 non-null  int64  
 1   Jenis Kelamin      100000 non-null  int64  
 2   Umur (bulan)       100000 non-null  int64  
 3   Tinggi Badan (cm)  100000 non-null  float64
 4   Berat Badan (kg)   100000 non-null  float64
 5   HAZ                100000 non-null  float64
 6   Stunting_Status    100000 non-null  object 
dtypes: float64(3), int64(3), object(1)
memory usage: 5.3+ MB


## Encoding Data

In [None]:
label = LabelEncoder()
stunting['Stunting_Status'] = label.fit_transform(stunting['Stunting_Status'])
stunting.head()

Unnamed: 0,Id,Jenis Kelamin,Umur (bulan),Tinggi Badan (cm),Berat Badan (kg),HAZ,Stunting_Status
0,0,1,19,91.6,13.3,1.451613,0
1,1,1,20,77.7,8.5,-3.032258,1
2,2,1,10,79.0,10.3,1.0,0
3,3,2,2,50.3,8.3,0.666667,0
4,4,2,5,56.4,10.9,-3.72,1


## Feature *Scaling*

In [None]:
fitur = ['Umur (bulan)', 'Tinggi Badan (cm)', 'Berat Badan (kg)', 'Jenis Kelamin']

scaler = StandardScaler()
stunting[fitur] = scaler.fit_transform(stunting[fitur])

stunting.sample(10)


Unnamed: 0,Id,Jenis Kelamin,Umur (bulan),Tinggi Badan (cm),Berat Badan (kg),HAZ,Stunting_Status
76448,76448,-0.996426,-0.276761,-0.680644,-0.896537,-3.689655,1
157,157,1.003586,0.417718,1.018181,0.19412,3.821429,0
48642,48642,-0.996426,-0.554553,-0.812677,1.012112,-1.423077,0
25939,25939,1.003586,0.139927,-0.231732,-1.108609,-1.25,0
98191,98191,1.003586,-1.387929,-1.895349,-1.593345,1.388889,0
81582,81582,1.003586,0.973302,0.410829,1.315073,-2.633333,2
73949,73949,1.003586,1.38999,1.194225,1.557441,0.333333,0
11093,11093,1.003586,-0.415657,-0.715853,-0.320912,-0.28,0
62541,62541,1.003586,1.38999,0.137961,2.254249,-3.666667,1
40303,40303,1.003586,1.38999,0.428434,2.193657,-2.566667,2


## Data Splitting

In [None]:
# 5. Pilih fitur dan target
X = stunting[["Umur (bulan)", "Tinggi Badan (cm)", "Berat Badan (kg)", "Jenis Kelamin"]]
y = stunting["Stunting_Status"]

# 6. Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Modelling

In [None]:
# 8. Model klasifikasi
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# 9. Evaluasi model
y_pred = model.predict(X_test_scaled)
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# 10. Membuat DataFrame hasil prediksi
df_prediksi = X_test.copy()
df_prediksi["Actual_Label"] = le.inverse_transform(y_test)
df_prediksi["Predicted_Label"] = le.inverse_transform(y_pred)

# 11. Tampilkan 10 hasil prediksi pertama
df_prediksi.head(10)

Classification Report:
                   precision    recall  f1-score   support

          Normal       1.00      1.00      1.00     17085
Severely Stunted       1.00      1.00      1.00      1081
         Stunted       1.00      1.00      1.00      1834

        accuracy                           1.00     20000
       macro avg       1.00      1.00      1.00     20000
    weighted avg       1.00      1.00      1.00     20000

Confusion Matrix:
 [[17085     0     0]
 [    0  1081     0]
 [    0     0  1834]]




Unnamed: 0,Umur (bulan),Tinggi Badan (cm),Berat Badan (kg),Jenis Kelamin,Actual_Label,Predicted_Label
75721,-1.526825,-1.631283,-0.684465,1.003586,Normal,Normal
80184,-0.554553,-0.627831,0.890928,1.003586,Normal,Normal
19864,1.112198,1.484698,0.49708,-0.996426,Normal,Normal
76699,0.139927,0.815731,0.103232,-0.996426,Normal,Normal
92991,-0.137865,-0.266941,1.193889,-0.996426,Stunted,Stunted
76434,0.556614,0.604478,0.739448,-0.996426,Normal,Normal
84004,-0.137865,-0.284545,0.769744,1.003586,Normal,Normal
80917,-0.832345,-0.619029,-0.805649,-0.996426,Normal,Normal
60767,-1.110137,-1.173568,-0.139136,-0.996426,Stunted,Stunted
50074,0.834406,0.085148,1.527145,1.003586,Normal,Normal


In [None]:
# Model TensorFlow
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(4,)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Training
model.fit(X_train, y_train, epochs=20, batch_size=16, validation_split=0.1)

# Evaluation
model.evaluate(X_test, y_test)

Epoch 1/20
[1m4500/4500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.8576 - loss: 0.4271 - val_accuracy: 0.8783 - val_loss: 0.2838
Epoch 2/20
[1m4500/4500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 0.8773 - loss: 0.2757 - val_accuracy: 0.8929 - val_loss: 0.2428
Epoch 3/20
[1m2980/4500[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m4s[0m 3ms/step - accuracy: 0.9001 - loss: 0.2313

In [None]:
def predict_stunting(model, scaler, input_data):
    """
    input_data: dict seperti
        {
            "Umur (bulan)": 24,
            "Tinggi Badan (cm)": 85.0,
            "Berat Badan (kg)": 12.5,
            "JK_Code": 1
        }
    """
    df_input = pd.DataFrame([input_data])
    scaled_input = scaler.transform(df_input)
    pred_probs = model.predict(scaled_input)
    pred_class = tf.argmax(pred_probs, axis=1).numpy()[0]
    return le.inverse_transform([pred_class])[0]

In [None]:
save_path = '/content/drive/MyDrive/model'
tf.saved_model.save(model, save_path)

In [None]:
!pip install tensorflowjs

In [None]:
!tensorflowjs_converter \
  --input_format=tf_saved_model \
  --output_format=tfjs_graph_model \
  "/content/drive/MyDrive/model" \
  "/content/drive/MyDrive/model/tfjs"