In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('AsuransiKesehatan.csv')

In [3]:
df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 986 entries, 0 to 985
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype
---  ------                   --------------  -----
 0   Age                      986 non-null    int64
 1   Diabetes                 986 non-null    int64
 2   BloodPressureProblems    986 non-null    int64
 3   AnyTransplants           986 non-null    int64
 4   AnyChronicDiseases       986 non-null    int64
 5   Height                   986 non-null    int64
 6   Weight                   986 non-null    int64
 7   KnownAllergies           986 non-null    int64
 8   HistoryOfCancerInFamily  986 non-null    int64
 9   NumberOfMajorSurgeries   986 non-null    int64
 10  PremiumPrice             986 non-null    int64
dtypes: int64(11)
memory usage: 84.9 KB


Unnamed: 0,Age,Diabetes,BloodPressureProblems,AnyTransplants,AnyChronicDiseases,Height,Weight,KnownAllergies,HistoryOfCancerInFamily,NumberOfMajorSurgeries,PremiumPrice
0,45,0,0,0,0,155,57,0,0,0,25000
1,60,1,0,0,0,180,73,0,0,0,29000
2,36,1,1,0,0,158,59,0,0,1,23000
3,52,1,1,0,1,183,93,0,0,2,28000
4,38,0,0,0,1,166,88,0,0,1,23000
...,...,...,...,...,...,...,...,...,...,...,...
981,18,0,0,0,0,169,67,0,0,0,15000
982,64,1,1,0,0,153,70,0,0,3,28000
983,56,0,1,0,0,155,71,0,0,1,29000
984,47,1,1,0,0,158,73,1,0,1,39000


In [4]:
df.rename(columns={
    "Age": "Usia",
    "Diabetes": "Diabetes",
    "BloodPressureProblems": "Masalah Tekanan Darah",
    "AnyTransplants": "Riwayat Transplantasi",
    "AnyChronicDiseases": "Penyakit Kronis",
    "Height": "Tinggi Badan cm",
    "Weight": "Berat Badan kg",
    "KnownAllergies": "Alergi",
    "HistoryOfCancerInFamily": "Riwayat Kanker Keluarga",
    "NumberOfMajorSurgeries": "Jumlah Operasi Besar",
    "PremiumPrice": "Premi Asuransi"
}, inplace=True)

df["Premi Asuransi"] = df["Premi Asuransi"] * 12
df


Unnamed: 0,Usia,Diabetes,Masalah Tekanan Darah,Riwayat Transplantasi,Penyakit Kronis,Tinggi Badan cm,Berat Badan kg,Alergi,Riwayat Kanker Keluarga,Jumlah Operasi Besar,Premi Asuransi
0,45,0,0,0,0,155,57,0,0,0,300000
1,60,1,0,0,0,180,73,0,0,0,348000
2,36,1,1,0,0,158,59,0,0,1,276000
3,52,1,1,0,1,183,93,0,0,2,336000
4,38,0,0,0,1,166,88,0,0,1,276000
...,...,...,...,...,...,...,...,...,...,...,...
981,18,0,0,0,0,169,67,0,0,0,180000
982,64,1,1,0,0,153,70,0,0,3,336000
983,56,0,1,0,0,155,71,0,0,1,348000
984,47,1,1,0,0,158,73,1,0,1,468000


In [5]:
def buat_label_risiko(premi):
    if premi < 2000000:
        return 0  # risiko rendah
    elif premi < 4000000:
        return 1  # risiko sedang
    else:
        return 2  # risiko tinggi

df['Risiko'] = df['Premi Asuransi'].apply(buat_label_risiko)
df

Unnamed: 0,Usia,Diabetes,Masalah Tekanan Darah,Riwayat Transplantasi,Penyakit Kronis,Tinggi Badan cm,Berat Badan kg,Alergi,Riwayat Kanker Keluarga,Jumlah Operasi Besar,Premi Asuransi,Risiko
0,45,0,0,0,0,155,57,0,0,0,300000,0
1,60,1,0,0,0,180,73,0,0,0,348000,0
2,36,1,1,0,0,158,59,0,0,1,276000,0
3,52,1,1,0,1,183,93,0,0,2,336000,0
4,38,0,0,0,1,166,88,0,0,1,276000,0
...,...,...,...,...,...,...,...,...,...,...,...,...
981,18,0,0,0,0,169,67,0,0,0,180000,0
982,64,1,1,0,0,153,70,0,0,3,336000,0
983,56,0,1,0,0,155,71,0,0,1,348000,0
984,47,1,1,0,0,158,73,1,0,1,468000,0


In [6]:
X = df.drop(['Premi Asuransi', 'Risiko'], axis=1).values
y_reg = df['Premi Asuransi'].values
y_clf = df['Risiko'].values

In [7]:
from sklearn.preprocessing import StandardScaler

scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y_reg = StandardScaler()
y_reg_scaled = scaler_y_reg.fit_transform(y_reg.reshape(-1,1))

In [8]:
X_train, X_test, y_train_reg, y_test_reg, y_train_clf, y_test_clf = train_test_split(
    X_scaled, y_reg_scaled.flatten(), y_clf, test_size=0.2, random_state=42
)

In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models
import keras_tuner as kt

def build_model(hp):
    inputs = layers.Input(shape=(X_train.shape[1],))
    x = layers.Dense(units=hp.Int('units', 64, 128, step=32), activation='relu')(inputs)
    x = layers.Dropout(rate=hp.Float('dropout', 0.2, 0.4, step=0.1))(x)
    
    risk_output = layers.Dense(3, activation='softmax', name='risk_output')(x)
    premium_output = layers.Dense(1, activation='linear', name='premium_output')(x)

    model = models.Model(inputs=inputs, outputs=[risk_output, premium_output])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float('lr', 1e-4, 1e-3, sampling='log')
        ),
        loss={
            'risk_output': 'sparse_categorical_crossentropy',
            'premium_output': 'mse'
        },
        metrics={
            'risk_output': 'accuracy',
            'premium_output': 'mae'
        }
    )
    return model

In [10]:
from keras_tuner import Objective

tuner = kt.RandomSearch(
    build_model,
    objective=Objective("val_risk_output_accuracy", direction="max"),  # ← tambahkan direction
    max_trials=10,
    executions_per_trial=1,
    directory='asuransi_tuning',
    project_name='multioutput_model'
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

Reloading Tuner from asuransi_tuning\multioutput_model\tuner0.json


In [12]:
tuner.search(
    X_train,
    {'risk_output': y_train_clf, 'premium_output': y_train_reg},
    validation_data=(X_test, {'risk_output': y_test_clf, 'premium_output': y_test_reg}),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop],
    verbose=2
)

In [13]:
# Ambil hyperparameter terbaik
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build model dengan hyperparameter terbaik
best_model = build_model(best_hp)

# Latih model terbaik dengan data training (optional, tapi direkomendasikan)
best_model.fit(
    X_train,
    {'risk_output': y_train_clf, 'premium_output': y_train_reg},
    validation_data=(X_test, {'risk_output': y_test_clf, 'premium_output': y_test_reg}),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop],
    verbose=2
)

Epoch 1/50
13/13 - 2s - 151ms/step - loss: 2.0005 - premium_output_loss: 1.0677 - premium_output_mae: 0.7455 - risk_output_accuracy: 0.6485 - risk_output_loss: 0.9085 - val_loss: 1.6870 - val_premium_output_loss: 0.9630 - val_premium_output_mae: 0.7185 - val_risk_output_accuracy: 0.9293 - val_risk_output_loss: 0.7138
Epoch 2/50
13/13 - 0s - 20ms/step - loss: 1.5077 - premium_output_loss: 0.8819 - premium_output_mae: 0.6749 - risk_output_accuracy: 0.9137 - risk_output_loss: 0.6114 - val_loss: 1.2386 - val_premium_output_loss: 0.7513 - val_premium_output_mae: 0.6431 - val_risk_output_accuracy: 1.0000 - val_risk_output_loss: 0.4808
Epoch 3/50
13/13 - 0s - 14ms/step - loss: 1.1625 - premium_output_loss: 0.7590 - premium_output_mae: 0.6238 - risk_output_accuracy: 0.9962 - risk_output_loss: 0.4091 - val_loss: 0.9484 - val_premium_output_loss: 0.6167 - val_premium_output_mae: 0.5926 - val_risk_output_accuracy: 1.0000 - val_risk_output_loss: 0.3299
Epoch 4/50
13/13 - 0s - 16ms/step - loss: 0.9

<keras.src.callbacks.history.History at 0x1564fb7f190>

In [14]:
# Evaluasi model terbaik
results = best_model.evaluate(
    X_test,
    {'risk_output': y_test_clf, 'premium_output': y_test_reg},
    batch_size=128,
    verbose=1
)
print("Evaluasi model terbaik:", results)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.3133 - premium_output_loss: 0.2911 - premium_output_mae: 0.3895 - risk_output_accuracy: 1.0000 - risk_output_loss: 0.0067
Evaluasi model terbaik: [0.294593870639801, 0.006829240825027227, 0.2645323574542999, 0.377778559923172, 1.0]


- Total loss: 0.295 (gabungan klasifikasi & regresi)
- Loss regresi (MSE): 0.0068 (sangat kecil)
- MAE regresi: 0.265 (rata-rata selisih absolut prediksi premi di skala standar)
- Akurasi klasifikasi risiko: 100% (prediksi kelas risiko sempurna pada data test)

Kesimpulan:

Model sangat baik untuk klasifikasi risiko dan cukup presisi untuk prediksi premi asuransi.

In [15]:
best_model.save("ModelPremiKesehatan.h5")



In [16]:
import joblib

# Simpan scaler fitur
joblib.dump(scaler_X, "scaler_Kesehatan_X.pkl")

# Simpan scaler target regresi
joblib.dump(scaler_y_reg, "scaler_Kesehatan_Y.pkl")

['scaler_Kesehatan_Y.pkl']