# Classification
In this dataset you have to predict that whether we have to give ATTORNEY to the clients or not (0=Yes, 1=No) based on below factors.
CLMAGE - Age of Client
CLMSEX - Sex of client (0=Male, 1= Female)
SEATBELT - Whether client has Seatbelt or not (0=Yes, 1=No)
CLMINSUR - Whether Client give Insurance or not (0=Yes, 1=No)
LOSS - Loss that client faced

Prepare model by using Naive Bayes,KNN and SVM & Also calculate Accuracy by confusion matrix, Calculate accuracy score, precision score, recall score, f1 score.

Dataset : Claimants.csv




In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### load the data

In [5]:
df = pd.read_csv('./claimants.csv')

### EDA

In [6]:

df.head()

Unnamed: 0,ATTORNEY,CLMSEX,CLMINSUR,SEATBELT,CLMAGE,LOSS
0,0,0.0,1.0,0.0,50.0,34.94
1,1,1.0,0.0,0.0,18.0,0.891
2,1,0.0,1.0,0.0,5.0,0.33
3,0,0.0,1.0,1.0,31.0,0.037
4,1,0.0,1.0,0.0,30.0,0.038


In [7]:
df.shape

(1340, 6)

In [8]:
df.isna().sum()

ATTORNEY      0
CLMSEX       12
CLMINSUR     41
SEATBELT     48
CLMAGE      189
LOSS          0
dtype: int64

In [9]:
df_cleaned = df.dropna()

In [10]:
df_cleaned.isna().sum()

ATTORNEY    0
CLMSEX      0
CLMINSUR    0
SEATBELT    0
CLMAGE      0
LOSS        0
dtype: int64

In [11]:
df_cleaned.corr()

Unnamed: 0,ATTORNEY,CLMSEX,CLMINSUR,SEATBELT,CLMAGE,LOSS
ATTORNEY,1.0,0.097475,0.084609,-0.060795,0.001132,-0.225769
CLMSEX,0.097475,1.0,0.058164,-0.017814,-0.017942,-0.029742
CLMINSUR,0.084609,0.058164,1.0,0.021217,0.048166,0.019502
SEATBELT,-0.060795,-0.017814,0.021217,1.0,-0.028343,0.131182
CLMAGE,0.001132,-0.017942,0.048166,-0.028343,1.0,0.065513
LOSS,-0.225769,-0.029742,0.019502,0.131182,0.065513,1.0


### split the data into x and y

In [12]:
x = df_cleaned.drop(['CLMINSUR','ATTORNEY'], axis=1)
y = df_cleaned['ATTORNEY']

### split the data into train and test sets

In [13]:
from sklearn.model_selection import train_test_split

x_train , x_test , y_train , y_test = train_test_split(x,y, random_state=12345)


### Naive Bayes Model training

In [14]:
from sklearn.naive_bayes import GaussianNB

model_nb = GaussianNB()
model_nb.fit(x_train, y_train)

### KNN Modeltraining

In [15]:
from sklearn.neighbors import KNeighborsClassifier

model_knn =KNeighborsClassifier(n_neighbors=5)
model_knn.fit(x_train , y_train)

### SVM Model training

In [16]:
from sklearn.svm import SVC 

model_svc = SVC(C = 2.0)
model_svc.fit(x_train , y_train)

## model evaluation 

In [17]:
from sklearn.metrics import confusion_matrix , accuracy_score , precision_score , recall_score ,f1_score

def evaluate_model(model , model_name):
    y_pred = model.predict(x_test)

    y_true = y_test

    print(f"---- {model_name} evaluation ----")

    cm = confusion_matrix(y_true , y_pred)
    print(f"confusion matrix: ")
    print(cm)
    print(f"accuracy : {accuracy_score(y_true , y_pred)* 100:.2f}%")
    print(f"precision: {precision_score(y_true, y_pred):.2f}")
    print(f"recall : {recall_score(y_true , y_pred):.2f}")
    print(f"F1 Score: {f1_score(y_true, y_pred):.2f}")
    
    



    

In [18]:
evaluate_model(model_nb ,"Naive Bayes")

---- Naive Bayes evaluation ----
confusion matrix: 
[[ 30 124]
 [  4 116]]
accuracy : 53.28%
precision: 0.48
recall : 0.97
F1 Score: 0.64


In [19]:
evaluate_model(model_knn , "KNN")

---- KNN evaluation ----
confusion matrix: 
[[107  47]
 [ 44  76]]
accuracy : 66.79%
precision: 0.62
recall : 0.63
F1 Score: 0.63


In [20]:
evaluate_model(model_svc, "SVM")

---- SVM evaluation ----
confusion matrix: 
[[84 70]
 [23 97]]
accuracy : 66.06%
precision: 0.58
recall : 0.81
F1 Score: 0.68
