## **Library**

In [15]:
import time
import pickle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from xgboost import XGBClassifier


## **Load Data**

In [16]:
# Preprocessing
data = pd.read_csv('Prediction Insurance.csv')
data['Gender'] = data['Gender'].map({'Male': 0, 'Female': 1})

# Mapping untuk kolom Vehicle_Age
data['Vehicle_Age'] = data['Vehicle_Age'].map({'< 1 Year': 0, '1-2 Year': 1, '> 2 Years': 2})

# Mapping untuk kolom Vehicle_Damage
data['Vehicle_Damage'] = data['Vehicle_Damage'].map({'No': 0, 'Yes': 1})


# Memisahkan fitur dan target
x = data.drop('Response', axis=1)
y = data['Response']

# Split data menjadi training dan testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)


In [21]:
data.dtypes
data.head()

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,0,44,1,28,0,2,1,40454,26,217,1
1,2,0,76,1,3,0,1,0,33536,26,183,0
2,3,0,47,1,28,0,2,1,38294,26,27,1
3,4,0,21,1,11,1,0,0,28619,152,203,0
4,5,1,29,1,41,1,0,0,27496,152,39,0


In [17]:
# Menghitung scale_pos_weight
count_class_0 = (y_train == 0).sum()
count_class_1 = (y_train == 1).sum()
scale_pos_weight = count_class_0 / count_class_1
print(scale_pos_weight)

7.166470241602829


In [22]:
# Membuat model XGBoost
start = time.time()
model = XGBClassifier(
    n_estimators=100,          # Jumlah pohon dalam boosting
    learning_rate=0.1,         # Laju pembelajaran
    max_depth=6,               # Kedalaman maksimum pohon
    scale_pos_weight=scale_pos_weight,  # Penanganan imbalance
    random_state=0
)

# Melatih model
model.fit(x_train, y_train)
stop = time.time()
print(f"Training Time: {stop - start:.2f} seconds")


Training Time: 6.99 seconds


In [23]:
# Prediksi dan evaluasi
y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))


              precision    recall  f1-score   support

           0       0.99      0.68      0.80     66846
           1       0.29      0.93      0.44      9376

    accuracy                           0.71     76222
   macro avg       0.64      0.80      0.62     76222
weighted avg       0.90      0.71      0.76     76222



In [24]:
# Menyimpan model ke file pickle
with open('modelXGBoost.pkl', 'wb') as file:
    pickle.dump(model, file)
