In [61]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler,StandardScaler

from imblearn.over_sampling import RandomOverSampler
from collections import Counter
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier
import pickle
#import warnings
import warnings
warnings.filterwarnings("ignore")

In [62]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [63]:
df=pd.read_csv('/content/drive/MyDrive/Health.csv')

In [64]:
# Preprocess the data
df['Gender'] = df['Gender'].map({'Female': 1, 'Male': 0})
df['Vehicle_Damage'] = df['Vehicle_Damage'].map({'Yes': 1, 'No': 0})
df['Vehicle_Age'] = df['Vehicle_Age'].map({'< 1 Year': 0, '1-2 Year': 1, '> 2 Years': 2})

In [65]:
# Select features and target variable
x = df[['Gender', 'Age', 'Driving_License', 'Region_Code', 'Previously_Insured', 'Vehicle_Age', 'Vehicle_Damage', 'Annual_Premium', 'Policy_Sales_Channel', 'Vintage']]
y = df['Response']

In [66]:
# Resampling
ros = RandomOverSampler(random_state=0)
x_resampled, y_resampled = ros.fit_resample(x, y)

In [67]:
print("After Random Over Sampling Of Minor Class, Total Samples are:", len(y_resampled))
print('Original dataset shape:', Counter(y))
print('Resampled dataset shape:', Counter(y_resampled))# Train the model

After Random Over Sampling Of Minor Class, Total Samples are: 668798
Original dataset shape: Counter({0: 334399, 1: 46710})
Resampled dataset shape: Counter({1: 334399, 0: 334399})


In [68]:
# Split the data into training and testing sets
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x_resampled, y_resampled, random_state=42, test_size=0.3)

In [69]:
# Normalizing the Dataset using Standard Scaling Technique
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [70]:
# Train the model
dt_model = DecisionTreeClassifier(random_state=30)
dt_model.fit(X_train_scaled, y_train)

In [71]:
# Make predictions on the test set
y_pred = dt_model.predict(X_test_scaled)

In [72]:
# Make predictions on the test set
y_pred = dt_model.predict(X_test_scaled)

# Evaluation
RS_dt = recall_score(y_test, y_pred)
print("Recall Score:", RS_dt)

PS_dt = precision_score(y_test, y_pred)
print("Precision Score:", PS_dt)

f1S_dt = f1_score(y_test, y_pred)
print("F1 Score:", f1S_dt)

AS_dt = accuracy_score(y_test, y_pred)
print("Accuracy Score:", AS_dt)

acu_dt = roc_auc_score(y_test, y_pred)
print("ROC AUC Score:", acu_dt)


Recall Score: 0.9961096075778079
Precision Score: 0.8921852581297733
F1 Score: 0.9412876383893941
Accuracy Score: 0.9377541866028708
ROC AUC Score: 0.9376469584585532


In [73]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.88      0.93    100136
           1       0.89      1.00      0.94    100504

    accuracy                           0.94    200640
   macro avg       0.94      0.94      0.94    200640
weighted avg       0.94      0.94      0.94    200640



In [74]:
# Save the trained model
import pickle
with open('model.pkl', 'wb') as file:
    pickle.dump(dt_model, file)