In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [2]:
h = 0.02  # step size in the mesh

names = [
    "Nearest Neighbors",
    "Linear SVM",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]

In [3]:
import pandas as pd
df_final = pd.read_csv("df_final.csv")

In [4]:
df_final

Unnamed: 0,Claim,Duration,Net Sales,Commision (in value),Agency_ADM,Agency_ART,Agency_C2B,Agency_CBH,Agency_CCR,Agency_CSR,...,Product Name_Single Trip Travel Protect Silver,Product Name_Spouse or Parents Comprehensive Plan,Product Name_Ticket Protector,Product Name_Travel Cruise Protect,Product Name_Travel Cruise Protect Family,Product Name_Value Plan,Distribution Channel_Offline,Distribution Channel_Online,Agency Type_Airlines,Agency Type_Travel Agency
0,No,8,-29.0,9.57,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,1
1,No,8,-29.0,9.57,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,1
2,No,3,-49.5,29.70,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
3,No,3,-39.6,23.76,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
4,No,4,-19.8,11.88,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63321,No,5,35.0,12.25,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
63322,No,3,40.0,14.00,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
63323,No,1,18.0,6.30,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
63324,No,1,18.0,6.30,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0


In [5]:
from imblearn.over_sampling import SMOTE 

In [6]:
y = [1 if i=="Yes" else 0 for i in df_final["Claim"]]
X = df_final.drop(columns=["Claim"])

In [7]:
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X, y)

In [8]:
X = StandardScaler().fit_transform(X_res)

In [9]:
from collections import Counter

In [10]:
y = y_res

In [11]:
from sklearn.metrics import classification_report

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.4, random_state=42
    )

In [13]:
X_train.shape

(74878, 52)

In [14]:
for name, clf in zip(names, classifiers):
    
    print(name)
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))
    print(score)
    print("*"*10)



Nearest Neighbors
              precision    recall  f1-score   support

           0       0.86      0.85      0.86     24922
           1       0.85      0.87      0.86     24998

    accuracy                           0.86     49920
   macro avg       0.86      0.86      0.86     49920
weighted avg       0.86      0.86      0.86     49920

0.8576522435897436
**********
Linear SVM
              precision    recall  f1-score   support

           0       0.79      0.88      0.83     24922
           1       0.86      0.77      0.81     24998

    accuracy                           0.82     49920
   macro avg       0.83      0.82      0.82     49920
weighted avg       0.83      0.82      0.82     49920

0.8224158653846154
**********
Decision Tree
              precision    recall  f1-score   support

           0       0.76      0.82      0.79     24922
           1       0.80      0.74      0.77     24998

    accuracy                           0.78     49920
   macro avg       0.78  



              precision    recall  f1-score   support

           0       0.58      0.99      0.73     24922
           1       0.97      0.29      0.44     24998

    accuracy                           0.64     49920
   macro avg       0.77      0.64      0.59     49920
weighted avg       0.77      0.64      0.59     49920

0.6379407051282051
**********
