## **Library**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import pickle

## **Load Data**

In [2]:
data = pd.read_csv('Homework - Prediction Insurance.csv')

## **Preprocess**

In [3]:
data['Gender'] = data['Gender'].map({'Male': 0, 'Female': 1})
data['Vehicle_Damage'] = data['Vehicle_Damage'].map({'Yes': 1, 'No': 0})
data['Vehicle_Age'] = data['Vehicle_Age'].map({'< 1 Year': 0, '1-2 Year': 1, '> 2 Years': 2})

## **Split the Data**

In [4]:
X = data.drop('Response', axis=1)
y = data['Response']

## **Split training and test data**

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## **Model with Balancing**

In [6]:
balanced_decision_tree_model = DecisionTreeClassifier(max_depth=5, min_samples_split=10, class_weight='balanced', random_state=0)
balanced_decision_tree_model.fit(X_train, y_train)

## **Make Predictions**

In [7]:
y_pred_balanced_tree = balanced_decision_tree_model.predict(X_test)

## **Eval Report**

In [8]:
balanced_decision_tree_report = classification_report(y_test, y_pred_balanced_tree)
print(balanced_decision_tree_report)

              precision    recall  f1-score   support

           0       0.98      0.67      0.80     66846
           1       0.28      0.91      0.43      9376

    accuracy                           0.70     76222
   macro avg       0.63      0.79      0.61     76222
weighted avg       0.90      0.70      0.75     76222



## **Save it**

In [11]:
with open('balanced_decision_tree_model_final.pkl','wb') as file:
    pickle.dump(balanced_decision_tree_model, file)