In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [2]:
# Generaset Dataset Dummy
np.random.seed(42)
n_samples = 5000

umur = np.random.randint(18, 70, size=n_samples)
penghasilan = np.random.randint(3_000_000, 15_000_000, size=n_samples)
merokok = np.random.choice([0, 1], size=n_samples)
bmi = np.round(np.random.normal(25, 5, size=n_samples), 1)
penyakit_kronis = np.random.choice([0, 1], size=n_samples)

In [3]:
# Gabung ke DataFrame
df = pd.DataFrame({
    'Umur': umur,
    'Penghasilan': penghasilan,
    'Merokok': merokok,
    'BMI': bmi,
    'Penyakit Kronis': penyakit_kronis
})
df

Unnamed: 0,Umur,Penghasilan,Merokok,BMI,Penyakit Kronis
0,56,5361216,1,28.3,1
1,69,10446153,0,28.9,1
2,46,5567587,1,33.3,0
3,32,3264237,1,22.9,1
4,60,7469353,1,30.0,0
...,...,...,...,...,...
4995,24,7949331,0,26.3,0
4996,66,8280156,0,22.1,1
4997,26,13802519,0,24.8,1
4998,53,11399122,1,23.5,0


In [4]:
# Membuat Label Risiko dan Premi
risk_score = (
    (umur > 50).astype(int) +
    (bmi > 30).astype(int) +
    merokok +
    penyakit_kronis
)
risk_label = pd.cut(risk_score, bins=[-1, 1, 2, 4], labels=[0, 1, 2]).astype(int)

base_premi = 100_000
premi = base_premi + (risk_label * 100_000) + (penghasilan * 0.01)
premi = premi.astype(int)

In [5]:
#  Membuat DataFrame dan Membagi Data
df = pd.DataFrame({
    'umur': umur,
    'penghasilan': penghasilan,
    'merokok': merokok,
    'bmi': bmi,
    'penyakit_kronis': penyakit_kronis,
    'risk_label': risk_label,
    'premi': premi
})


features = df[['umur', 'penghasilan', 'merokok', 'bmi', 'penyakit_kronis']]
target_risk = df['risk_label']
target_premi = df['premi']

X_train, X_test, y_risk_train, y_risk_test, y_premi_train, y_premi_test = train_test_split(
    features, target_risk, target_premi, test_size=0.2, random_state=42
)

In [6]:
# Standarisasi Fitur Numerik
scaler = StandardScaler()
X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()

X_train_scaled[['umur', 'penghasilan', 'bmi']] = scaler.fit_transform(X_train[['umur', 'penghasilan', 'bmi']])
X_test_scaled[['umur', 'penghasilan', 'bmi']] = scaler.transform(X_test[['umur', 'penghasilan', 'bmi']])


In [7]:
# Membangun Model TensorFlow
input_layer = tf.keras.Input(shape=(5,))

x = tf.keras.layers.Dense(64, activation='relu')(input_layer)
x = tf.keras.layers.Dense(32, activation='relu')(x)

risk_output = tf.keras.layers.Dense(3, activation='softmax', name='risk_label')(x)
premi_output = tf.keras.layers.Dense(1, activation='linear', name='premi')(x)

model = tf.keras.Model(inputs=input_layer, outputs=[risk_output, premi_output])
model.compile(
    optimizer='adam',
    loss={
        'risk_label': 'sparse_categorical_crossentropy',  # klasifikasi multi kelas
        'premi': 'mse'  # regresi nilai kontinu
    },
    metrics={
        'risk_label': 'accuracy',
        'premi': 'mae'  # mean absolute error, supaya mudah dimengerti error-nya
    }
)

In [8]:
# Melatih Model
history = model.fit(
    X_train_scaled,
    {'risk_label': y_risk_train, 'premi': y_premi_train},
    validation_data=(X_test_scaled, {'risk_label': y_risk_test, 'premi': y_premi_test}),
    epochs=30,
    batch_size=32,
    verbose=2
)

Epoch 1/30
125/125 - 2s - 20ms/step - loss: 71859978240.0000 - premi_loss: 71859978240.0000 - premi_mae: 255667.3750 - risk_label_accuracy: 0.3970 - risk_label_loss: 1.1211 - val_loss: 71007821824.0000 - val_premi_loss: 71213383680.0000 - val_premi_mae: 253760.6094 - val_risk_label_accuracy: 0.4490 - val_risk_label_loss: 1.1048
Epoch 2/30
125/125 - 0s - 3ms/step - loss: 71810334720.0000 - premi_loss: 71810334720.0000 - premi_mae: 255578.1719 - risk_label_accuracy: 0.4420 - risk_label_loss: 1.1247 - val_loss: 70903480320.0000 - val_premi_loss: 71108927488.0000 - val_premi_mae: 253574.9531 - val_risk_label_accuracy: 0.4670 - val_risk_label_loss: 1.1285
Epoch 3/30
125/125 - 0s - 3ms/step - loss: 71608442880.0000 - premi_loss: 71608442880.0000 - premi_mae: 255224.0625 - risk_label_accuracy: 0.4383 - risk_label_loss: 1.1818 - val_loss: 70588948480.0000 - val_premi_loss: 70793961472.0000 - val_premi_mae: 253026.2500 - val_risk_label_accuracy: 0.4090 - val_risk_label_loss: 1.2083
Epoch 4/30
1

In [9]:
# Evaluasi Model
eval_results = model.evaluate(X_test_scaled, {'risk_label': y_risk_test, 'premi': y_premi_test}, verbose=2)
print(f"Test Loss dan Metrics: {eval_results}")

32/32 - 0s - 3ms/step - loss: 3100734208.0000 - premi_loss: 3117381888.0000 - premi_mae: 43909.3711 - risk_label_accuracy: 0.5110 - risk_label_loss: 6.5766
Test Loss dan Metrics: [3100734208.0, 6.576597213745117, 3117381888.0, 43909.37109375, 0.5109999775886536]


In [10]:
# Menyimpan Model
model.save('model_asuransi_kesehatan.h5')

import joblib
joblib.dump(scaler, 'scaler_asuransi_kesehatan.pkl')



['scaler_asuransi_kesehatan.pkl']

In [11]:
# Load Model dan Scaler untuk Prediksi

from tensorflow.keras.models import load_model
import joblib

# Load model dan scaler
model = load_model('model_asuransi_kesehatan.h5', compile=False)

scaler = joblib.load('scaler_asuransi_kesehatan.pkl')

# Fungsi prediksi risiko dan premi baru
def prediksi_risiko_premi(umur, penghasilan, merokok, bmi, penyakit_kronis, base_premi=100_000):
    input_df = pd.DataFrame({
        'umur': [umur],
        'penghasilan': [penghasilan],
        'merokok': [1 if merokok else 0],
        'bmi': [bmi],
        'penyakit_kronis': [1 if penyakit_kronis else 0]
    })
    
    # Standarisasi fitur numerik
    input_scaled = input_df.copy()
    input_scaled[['umur','penghasilan','bmi']] = scaler.transform(input_df[['umur','penghasilan','bmi']])
    
    # Prediksi
    prediksi = model.predict(input_scaled)
    kelas_risiko = np.argmax(prediksi[0], axis=1)[0]
    
    # Hitung premi akhir
    premi_final = base_premi + (kelas_risiko * 100_000) + (penghasilan * 0.01)
    
    return kelas_risiko, int(premi_final)


In [12]:
# Contoh input user
umur = 18
penghasilan = 9000000
merokok = True
bmi = 28
penyakit_kronis = False

kelas_risiko, premi = prediksi_risiko_premi(umur, penghasilan, merokok, bmi, penyakit_kronis)

print(f'Kelas Risiko: {kelas_risiko}')   # 0 = rendah, 1 = sedang, 2 = tinggi (sesuai label)
print(f'Perkiraan Premi: Rp {premi}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
Kelas Risiko: 0
Perkiraan Premi: Rp 190000
