In [32]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier  # Importing KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

In [10]:
# Loading the dataset
data = pd.read_csv('Prediction Insurance.csv')

In [12]:
# Displaying the first few rows of the dataset
print(data.head())

   id  Gender  Age  Driving_License  Region_Code  Previously_Insured  \
0   1    Male   44                1           28                   0   
1   2    Male   76                1            3                   0   
2   3    Male   47                1           28                   0   
3   4    Male   21                1           11                   1   
4   5  Female   29                1           41                   1   

  Vehicle_Age Vehicle_Damage  Annual_Premium  Policy_Sales_Channel  Vintage  \
0   > 2 Years            Yes           40454                    26    217.0   
1    1-2 Year             No           33536                    26    183.0   
2   > 2 Years            Yes           38294                    26     27.0   
3    < 1 Year             No           28619                   152    203.0   
4    < 1 Year             No           27496                   152     39.0   

   Response  
0       1.0  
1       0.0  
2       1.0  
3       0.0  
4       0.0  


In [16]:
# Preprocessing the data
# Encoding categorical variables
data['Gender'] = data['Gender'].map({'Male': 0, 'Female': 1})
data['Vehicle_Age'] = data['Vehicle_Age'].map({'< 1 Year': 0, '1-2 Year': 1, '> 2 Years': 2})
data['Vehicle_Damage'] = data['Vehicle_Damage'].map({'No': 0, 'Yes': 1})

In [24]:
data

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,0,44,1,28,0,2,1,40454,26,217.0,1.0
1,2,0,76,1,3,0,1,0,33536,26,183.0,0.0
2,3,0,47,1,28,0,2,1,38294,26,27.0,1.0
3,4,0,21,1,11,1,0,0,28619,152,203.0,0.0
4,5,1,29,1,41,1,0,0,27496,152,39.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
327738,327739,0,37,1,37,0,1,1,23059,160,221.0,0.0
327739,327740,1,47,1,28,0,1,1,35201,3,135.0,1.0
327740,327741,0,22,1,47,0,0,1,27342,152,19.0,0.0
327741,327742,0,40,1,38,0,1,1,27525,26,59.0,0.0


In [23]:
# Handling missing values if any
data = data.dropna()

In [25]:
# Splitting the dataset into features and target variable
X = data.drop('Response', axis=1)
y = data['Response']

In [27]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [33]:
# Training the model
model = KNeighborsClassifier(n_neighbors=5)  # Adjust n_neighbors as needed
model.fit(X_train, y_train)

In [34]:
# Making predictions
y_pred = model.predict(X_test)

In [35]:
# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [36]:
print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 0.8549482066850753
Confusion Matrix:
[[54710  2794]
 [ 6714  1331]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.89      0.95      0.92     57504
         1.0       0.32      0.17      0.22      8045

    accuracy                           0.85     65549
   macro avg       0.61      0.56      0.57     65549
weighted avg       0.82      0.85      0.83     65549



In [37]:
# Saving the model to a pickle file
with open('insurance_model.pkl', 'wb') as file:
    pickle.dump(model, file)
# Can't upload this pickle file as the size is exceed 25MB (Can't upload on github)

In [40]:
# Saving the model to a pickle file that zipped (Alternative)
import gzip

with gzip.open('insurance_model_knn.pkl.gz', 'wb') as file:
    pickle.dump(model, file)

In [38]:
# Saving the scaler to a pickle file
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)