In [71]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load data
file = pd.read_csv("car_insurance.csv")

# Fill missing values with mean
file['credit_score'].fillna(file['credit_score'].mean(), inplace=True)
file['annual_mileage'].fillna(file['annual_mileage'].mean(), inplace=True)

# Convert categorical variables to codes
file['driving_experience'] = file['driving_experience'].astype('category').cat.codes
file['education'] = file['education'].astype('category').cat.codes
file['income'] = file['income'].astype('category').cat.codes
file['vehicle_year'] = file['vehicle_year'].astype('category').cat.codes
file['vehicle_type'] = file['vehicle_type'].astype('category').cat.codes

# Selecting features and target variable
features = ['age', 'gender', 'driving_experience',  'income', 
            'vehicle_ownership', 'vehicle_year',  
            'speeding_violations']
X = file[features]  # Features
y = file["outcome"]  # Target variable

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Initialize and fit KNN classifier
knn = KNeighborsClassifier(n_neighbors=13)
knn.fit(X_train, y_train)

# Predict on test set
y_predict = knn.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_predict)
print("Accuracy:", accuracy*100)

# Show classification report
print("\nClassification Report:\n", classification_report(y_test, y_predict))


# Create a DataFrame to show actual and predicted values
results = pd.DataFrame({'True Outcome': y_test, 'Predicted Outcome': y_predict})

# Display the DataFrame
print("\nTest set with True and Predicted Outcomes:\n", results.head())

Accuracy: 82.8

Classification Report:
               precision    recall  f1-score   support

         0.0       0.85      0.90      0.88      1345
         1.0       0.76      0.69      0.72       655

    accuracy                           0.83      2000
   macro avg       0.81      0.79      0.80      2000
weighted avg       0.82      0.83      0.83      2000


Test set with True and Predicted Outcomes:
       True Outcome  Predicted Outcome
9394           1.0                1.0
898            0.0                0.0
2398           0.0                0.0
5906           0.0                0.0
2343           1.0                1.0
