# INSURANCE FRAUD DETECTION

*Problem: Whether the claim was fraudulent or not?*

***IMPORT LIBRARIES***

In [1]:
import pandas as pd
from sklearn.preprocessing import label_binarize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation

import warnings
warnings.filterwarnings("ignore")
#Please ignore the warnings with version change

***IMPORT TRAIN AND TEST DATASET***

In [2]:
trainfile = r'C:\Users\Shehjar Raina\Desktop\CIS 508 Data Mining\IA2\FraudTrain.csv'
trainData = pd.read_csv(trainfile) #creates a dataframe
testfile = r'C:\Users\Shehjar Raina\Desktop\CIS 508 Data Mining\IA2\FraudTest.csv'
testData = pd.read_csv(testfile)

print(trainData.shape)
print(testData.shape)

(2999, 32)
(12918, 32)


***READ TRAIN DATA***

In [3]:
trainData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2999 entries, 0 to 2998
Data columns (total 32 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   MONTH                 2999 non-null   object
 1   WEEKOFMONTH           2999 non-null   int64 
 2   DAYOFWEEK             2999 non-null   object
 3   MAKE                  2999 non-null   object
 4   ACCIDENTAREA          2999 non-null   object
 5   DAYOFWEEKCLAIMED      2999 non-null   object
 6   MONTHCLAIMED          2999 non-null   object
 7   WEEKOFMONTHCLAIMED    2999 non-null   int64 
 8   SEX                   2999 non-null   object
 9   MARITALSTATUS         2999 non-null   object
 10  AGE                   2999 non-null   int64 
 11  FAULT                 2999 non-null   object
 12  POLICYTYPE            2999 non-null   object
 13  VEHICLECATEGORY       2999 non-null   object
 14  VEHICLEPRICE          2999 non-null   object
 15  REPNUMBER             2999 non-null   

 ***READ TEST DATASET***

In [4]:
testData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12918 entries, 0 to 12917
Data columns (total 32 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   MONTH                 12918 non-null  object
 1   WEEKOFMONTH           12918 non-null  int64 
 2   DAYOFWEEK             12918 non-null  object
 3   MAKE                  12918 non-null  object
 4   ACCIDENTAREA          12918 non-null  object
 5   DAYOFWEEKCLAIMED      12918 non-null  object
 6   MONTHCLAIMED          12918 non-null  object
 7   WEEKOFMONTHCLAIMED    12918 non-null  int64 
 8   SEX                   12918 non-null  object
 9   MARITALSTATUS         12918 non-null  object
 10  AGE                   12918 non-null  int64 
 11  FAULT                 12918 non-null  object
 12  POLICYTYPE            12918 non-null  object
 13  VEHICLECATEGORY       12918 non-null  object
 14  VEHICLEPRICE          12918 non-null  object
 15  REPNUMBER             12918 non-null

***CHECK FOR MISSING VALUES***

In [5]:
trainData.isnull().sum().sort_values(ascending=False)

MONTH                   0
WEEKOFMONTH             0
BASEPOLICY              0
YEAR                    0
NUMBEROFCARS            0
ADDRESSCHANGE_CLAIM     0
NUMBEROFSUPPLIMENTS     0
AGENTTYPE               0
WITNESSPRESENT          0
POLICEREPORTFILED       0
AGEOFPOLICYHOLDER       0
AGEOFVEHICLE            0
PASTNUMBEROFCLAIMS      0
DAYS_POLICY_CLAIM       0
DAYS_POLICY_ACCIDENT    0
DRIVERRATING            0
DEDUCTIBLE              0
REPNUMBER               0
VEHICLEPRICE            0
VEHICLECATEGORY         0
POLICYTYPE              0
FAULT                   0
AGE                     0
MARITALSTATUS           0
SEX                     0
WEEKOFMONTHCLAIMED      0
MONTHCLAIMED            0
DAYOFWEEKCLAIMED        0
ACCIDENTAREA            0
MAKE                    0
DAYOFWEEK               0
FRAUDFOUND              0
dtype: int64

***DESCRIBE***

In [6]:
trainData.describe()

Unnamed: 0,WEEKOFMONTH,WEEKOFMONTHCLAIMED,AGE,REPNUMBER,DEDUCTIBLE,DRIVERRATING,YEAR
count,2999.0,2999.0,2999.0,2999.0,2999.0,2999.0,2999.0
mean,2.78126,2.671224,40.055352,8.511837,407.302434,2.496832,1995.114038
std,1.286055,1.261614,13.497026,4.601437,41.847258,1.118365,0.606007
min,1.0,1.0,0.0,1.0,300.0,1.0,1994.0
25%,2.0,2.0,31.0,4.0,400.0,1.0,1995.0
50%,3.0,3.0,38.0,9.0,400.0,2.0,1995.0
75%,4.0,4.0,49.0,12.0,400.0,3.0,1995.0
max,5.0,5.0,80.0,16.0,700.0,4.0,1996.0


In [7]:
TrainCols = list(trainData.columns.values)
TestCols = list(testData.columns.values)
print("Train Data Columns")
print(TrainCols)
print()
print("Test Data Columns")
print(TestCols)

Train Data Columns
['MONTH', 'WEEKOFMONTH', 'DAYOFWEEK', 'MAKE', 'ACCIDENTAREA', 'DAYOFWEEKCLAIMED', 'MONTHCLAIMED', 'WEEKOFMONTHCLAIMED', 'SEX', 'MARITALSTATUS', 'AGE', 'FAULT', 'POLICYTYPE', 'VEHICLECATEGORY', 'VEHICLEPRICE', 'REPNUMBER', 'DEDUCTIBLE', 'DRIVERRATING', 'DAYS_POLICY_ACCIDENT', 'DAYS_POLICY_CLAIM', 'PASTNUMBEROFCLAIMS', 'AGEOFVEHICLE', 'AGEOFPOLICYHOLDER', 'POLICEREPORTFILED', 'WITNESSPRESENT', 'AGENTTYPE', 'NUMBEROFSUPPLIMENTS', 'ADDRESSCHANGE_CLAIM', 'NUMBEROFCARS', 'YEAR', 'BASEPOLICY', 'FRAUDFOUND']

Test Data Columns
['MONTH', 'WEEKOFMONTH', 'DAYOFWEEK', 'MAKE', 'ACCIDENTAREA', 'DAYOFWEEKCLAIMED', 'MONTHCLAIMED', 'WEEKOFMONTHCLAIMED', 'SEX', 'MARITALSTATUS', 'AGE', 'FAULT', 'POLICYTYPE', 'VEHICLECATEGORY', 'VEHICLEPRICE', 'REPNUMBER', 'DEDUCTIBLE', 'DRIVERRATING', 'DAYS_POLICY_ACCIDENT', 'DAYS_POLICY_CLAIM', 'PASTNUMBEROFCLAIMS', 'AGEOFVEHICLE', 'AGEOFPOLICYHOLDER', 'POLICEREPORTFILED', 'WITNESSPRESENT', 'AGENTTYPE', 'NUMBEROFSUPPLIMENTS', 'ADDRESSCHANGE_CLAIM', 'N

***SEPARATE TARGET COLUMN FROM TRAIN DATASET***

In [8]:
# Seperate Target column from Train Data
Xtrain = trainData[TrainCols[0:len(TrainCols)-1]].copy()
Ytrain = trainData[['FRAUDFOUND']].copy()
print("Train Set shape:")
print(Xtrain.shape)
print(Ytrain.shape)
Xtest = testData[TestCols[0:len(TestCols)-1]].copy()
Ytest = testData[['FRAUDFOUND']].copy()
print("Test Set shape:")
print(Xtest.shape)
print(Ytest.shape)

Train Set shape:
(2999, 31)
(2999, 1)
Test Set shape:
(12918, 31)
(12918, 1)


***LIST ALL CATEGORICAL FEATURES***

In [9]:
#List of Categorical Features
categoricalFeatures = ['MONTH','DAYOFWEEK', 'MAKE', 'ACCIDENTAREA', 'DAYOFWEEKCLAIMED', 'MONTHCLAIMED', 'SEX', 
                       'MARITALSTATUS','FAULT', 'POLICYTYPE', 'VEHICLECATEGORY', 'VEHICLEPRICE', 'DEDUCTIBLE', 
                       'DRIVERRATING', 'DAYS_POLICY_ACCIDENT', 'DAYS_POLICY_CLAIM', 'PASTNUMBEROFCLAIMS', 
                       'AGEOFVEHICLE', 'AGEOFPOLICYHOLDER', 'POLICEREPORTFILED', 'WITNESSPRESENT', 'AGENTTYPE', 
                       'NUMBEROFSUPPLIMENTS', 'ADDRESSCHANGE_CLAIM', 'NUMBEROFCARS', 'BASEPOLICY']

***ONE HOT ENCODING ON TRAIN DATASET***

In [10]:
# OneHotEncoding on Train (fit & transform)
# OneHotEncoding is to be done on Categorical variables.
ohe = OneHotEncoder(handle_unknown='ignore',sparse=False)
Xcat = pd.DataFrame(ohe.fit_transform(Xtrain[categoricalFeatures]),columns=ohe.get_feature_names(),index=Xtrain.index)
Xtrain = pd.concat([Xtrain,Xcat],axis=1)
Xtrain.drop(labels=categoricalFeatures,axis=1,inplace=True)
Xtrain.sample(5)

Unnamed: 0,WEEKOFMONTH,WEEKOFMONTHCLAIMED,AGE,REPNUMBER,YEAR,x0_Apr,x0_Aug,x0_Dec,x0_Feb,x0_Jan,...,x23_4_to_8_years,x23_no_change,x23_under_6_months,x24_1-vehicle,x24_2-vehicles,x24_3_to_4,x24_5_to_8,x25_All_Perils,x25_Collision,x25_Liability
2588,1,1,53,16,1996,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
930,3,3,56,7,1994,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1941,1,3,45,11,1995,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
432,1,1,31,1,1994,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1558,3,4,46,1,1995,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


***ONE HOT ENCODING ON TEST DATASET***

In [11]:
# OneHotEncoding on Test (only transform)
# OneHotEncoding is to be done on Categorical variables.
Xcat = pd.DataFrame(ohe.transform(Xtest[categoricalFeatures]),columns=ohe.get_feature_names(),index=Xtest.index)
Xtest = pd.concat([Xtest,Xcat],axis=1)
Xtest.drop(labels=categoricalFeatures,axis=1,inplace=True)
Xtest.sample(5)

Unnamed: 0,WEEKOFMONTH,WEEKOFMONTHCLAIMED,AGE,REPNUMBER,YEAR,x0_Apr,x0_Aug,x0_Dec,x0_Feb,x0_Jan,...,x23_4_to_8_years,x23_no_change,x23_under_6_months,x24_1-vehicle,x24_2-vehicles,x24_3_to_4,x24_5_to_8,x25_All_Perils,x25_Collision,x25_Liability
8282,3,4,0,7,1996,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
4447,5,1,53,5,1995,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
11709,1,1,30,16,1994,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
7362,3,4,30,7,1994,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
3814,2,3,54,3,1996,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


***INITIALIZE DECISION TREE CLASSIFIER***

In [12]:
dt = DecisionTreeClassifier()
dt.fit(Xtrain, Ytrain)

DecisionTreeClassifier()

In [13]:
X_Pred = dt.predict(Xtest)
XPred = dt.predict(Xtrain)
#Model Accuracy
print("Train Accuracy:", metrics.accuracy_score(Ytrain,XPred))
print("Test Accuracy:", metrics.accuracy_score(Ytest,X_Pred))
print("Confusion Matrix for Decision Tree:")
print(confusion_matrix(Ytest,X_Pred))
print("Max Depth",dt.get_depth())
print("Leaf",dt.get_n_leaves())
print('Printing the precision and recall, among other metrics')
print(metrics.classification_report(Ytest, X_Pred))
clf_cv_score = cross_val_score(dt, Xtrain, Ytrain, cv=10, scoring="accuracy")
print("Accuracy of Model with Cross Validation is:",clf_cv_score.mean() * 100)

Train Accuracy: 1.0
Test Accuracy: 0.8814832017340145
Confusion Matrix for Decision Tree:
[[10943  1477]
 [   54   444]]
Max Depth 23
Leaf 286
Printing the precision and recall, among other metrics
              precision    recall  f1-score   support

          No       1.00      0.88      0.93     12420
         Yes       0.23      0.89      0.37       498

    accuracy                           0.88     12918
   macro avg       0.61      0.89      0.65     12918
weighted avg       0.97      0.88      0.91     12918

Accuracy of Model with Cross Validation is: 81.39319955406911


***INITIALIZE RANDOM FOREST CLASSIFIER***

In [14]:
rf = RandomForestClassifier()
rf.fit(Xtrain, Ytrain)

RandomForestClassifier()

In [15]:
X_Pred1 = rf.predict(Xtest)
XPred1 = rf.predict(Xtrain)
#Model Accuracy
print("Train Accuracy:", metrics.accuracy_score(Ytrain,XPred1))
print("Test Accuracy:", metrics.accuracy_score(Ytest,X_Pred1))
print("Confusion Matrix for Random Forest:")
print(confusion_matrix(Ytest,X_Pred1))
print('Printing the precision and recall, among other metrics')
print(metrics.classification_report(Ytest, X_Pred1))
clf_cv_score = cross_val_score(rf ,Xtrain, Ytrain, cv=10, scoring="accuracy")
print("Accuracy of Model with Cross Validation is:",clf_cv_score.mean() * 100)

Train Accuracy: 1.0
Test Accuracy: 0.9664034680291067
Confusion Matrix for Random Forest:
[[12065   355]
 [   79   419]]
Printing the precision and recall, among other metrics
              precision    recall  f1-score   support

          No       0.99      0.97      0.98     12420
         Yes       0.54      0.84      0.66       498

    accuracy                           0.97     12918
   macro avg       0.77      0.91      0.82     12918
weighted avg       0.98      0.97      0.97     12918

Accuracy of Model with Cross Validation is: 88.19654403567448


# ***Decision Tree: Random & Grid Search***

***Hyperparameter tuning done for decision tree classifier***

***RANDOM SEARCH***

In [16]:
import time
start_time = time.time()

print("Randomized Search CV for Decision tree")
parameters={'min_samples_leaf' : range(10,300,10),'max_depth': 
            range(5,30,2),'criterion':['gini','entropy']}
dt_random = RandomizedSearchCV(dt,parameters,n_iter=25,cv=5)
dt_random.fit(Xtrain, Ytrain)
grid_parm=dt_random.best_params_
print(grid_parm)
print("Accuracy Score for Decision Tree:{0:6f}".
      format(dt_random.score(Xtest,Ytest)))

print("--- %s seconds ---" % (time.time() - start_time))

Randomized Search CV for Decision tree
{'min_samples_leaf': 30, 'max_depth': 15, 'criterion': 'gini'}
Accuracy Score for Decision Tree:0.897430
--- 3.0424840450286865 seconds ---


***GRID SEARCH***

In [17]:
import time
start_time = time.time()

print("Grid Search CV for Decision tree")
dt_grid = GridSearchCV(dt,parameters)
dt_grid.fit(Xtrain, Ytrain)
grid_parm1=dt_grid.best_params_
print(grid_parm1)
print("Accuracy Score for Decision Tree:{0:6f}".
      format(dt_grid.score(Xtest,Ytest)))

print("--- %s seconds ---" % (time.time() - start_time))

Grid Search CV for Decision tree
{'criterion': 'entropy', 'max_depth': 7, 'min_samples_leaf': 30}
Accuracy Score for Decision Tree:0.897430
--- 74.73814749717712 seconds ---


In [18]:
#Using the parameters obtained from HyperParameterTuning in the DecisionTreeClassifier 
dtRand = DecisionTreeClassifier(**grid_parm)
dtGrid = DecisionTreeClassifier(**grid_parm1)

dtRand.fit(Xtrain,Ytrain)
dtRand_predict = dtRand.predict(Xtest)
dtGrid.fit(Xtrain,Ytrain)
dtGrid_predict = dtGrid.predict(Xtest)

***Accuracy for Decision Tree using Random Search CV for Hyperparameter Tuning***

In [19]:
# Accuracy for Decision Tree using Random Search CV for Hyperparameter Tuning

print("Test Accuracy:", metrics.accuracy_score(Ytest,dtRand_predict))
print("Confusion Matrix for Decision Tree:")
print(confusion_matrix(Ytest,dtRand_predict))
print('Printing the precision and recall, among other metrics')
print(metrics.classification_report(Ytest, dtRand_predict))
clf_cv_score = cross_val_score(dtRand, Xtrain, Ytrain, cv=10, scoring="accuracy")
print("Accuracy of Model with Cross Validation is:",clf_cv_score.mean() * 100)

Test Accuracy: 0.8974299427155906
Confusion Matrix for Decision Tree:
[[11427   993]
 [  332   166]]
Printing the precision and recall, among other metrics
              precision    recall  f1-score   support

          No       0.97      0.92      0.95     12420
         Yes       0.14      0.33      0.20       498

    accuracy                           0.90     12918
   macro avg       0.56      0.63      0.57     12918
weighted avg       0.94      0.90      0.92     12918

Accuracy of Model with Cross Validation is: 87.62976588628761


***Accuracy for Decision Tree using Grid Search for Hyperparameter Tuning***

In [20]:
# Accuracy for Decision Tree using Grid Search for Hyperparameter Tuning

print("Test Accuracy:", metrics.accuracy_score(Ytest,dtGrid_predict))
print("Confusion Matrix for Decision Tree:")
print(confusion_matrix(Ytest,dtGrid_predict))
print('Printing the precision and recall, among other metrics')
print(metrics.classification_report(Ytest, dtGrid_predict))
clf_cv_score = cross_val_score(dtGrid, Xtrain, Ytrain, cv=10, scoring="accuracy")
print("Accuracy of Model with Cross Validation is:",clf_cv_score.mean() * 100)

Test Accuracy: 0.8974299427155906
Confusion Matrix for Decision Tree:
[[11427   993]
 [  332   166]]
Printing the precision and recall, among other metrics
              precision    recall  f1-score   support

          No       0.97      0.92      0.95     12420
         Yes       0.14      0.33      0.20       498

    accuracy                           0.90     12918
   macro avg       0.56      0.63      0.57     12918
weighted avg       0.94      0.90      0.92     12918

Accuracy of Model with Cross Validation is: 87.8964325529543


# ***Random Forest: Random & Grid Search***

***Hyperparameter tuning done for random forest classifier***

***RANDOM SEARCH***

In [21]:
import time
start_time = time.time()

print("Randomized Search CV for Random Forest")
rand_parameters={'min_samples_leaf' : range(10,100,10),'max_depth': 
            range(1,10,2),'max_features':[10,20,30],'n_estimators':[20,30,40]}
rf_random = RandomizedSearchCV(rf,rand_parameters,n_iter=25,cv=5)
rf_random.fit(Xtrain, Ytrain)
grid_parm=rf_random.best_params_
print(grid_parm)
print("Accuracy Score for Random Forest:{0:6f}".
      format(rf_random.score(Xtest,Ytest)))

print("--- %s seconds ---" % (time.time() - start_time))

Randomized Search CV for Random Forest
{'n_estimators': 20, 'min_samples_leaf': 10, 'max_features': 30, 'max_depth': 9}
Accuracy Score for Random Forest:0.932962
--- 8.791207075119019 seconds ---


**GRID SEARCH**

In [22]:
import time
start_time = time.time()

print("Grid Search CV for Random Forest")
rf_grid = GridSearchCV(rf,rand_parameters)
rf_grid.fit(Xtrain, Ytrain)
grid_parm1=rf_grid.best_params_
print(grid_parm1)
print("Accuracy Score for Random Forest:{0:6f}".
      format(rf_grid.score(Xtest,Ytest)))

print("--- %s seconds ---" % (time.time() - start_time))

Grid Search CV for Random Forest
{'max_depth': 9, 'max_features': 30, 'min_samples_leaf': 20, 'n_estimators': 40}
Accuracy Score for Random Forest:0.932730
--- 134.47368168830872 seconds ---


In [23]:
#Using the parameters obtained from HyperParameterTuning in the RandomForestClassifier 
rfRand = RandomForestClassifier(**grid_parm)
rfGrid = RandomForestClassifier(**grid_parm1)

rfRand.fit(Xtrain,Ytrain)
rfRand_predict = rfRand.predict(Xtest)
rfGrid.fit(Xtrain,Ytrain)
rfGrid_predict = rfGrid.predict(Xtest)

***Accuracy for Random Forest using Random Search CV for Hyperparameter Tuning***

In [24]:
# Accuracy for Random Forest using Random Search CV for Hyperparameter Tuning

print("Test Accuracy:", metrics.accuracy_score(Ytest,rfRand_predict))
print("Confusion Matrix for Random Forest:")
print(confusion_matrix(Ytest,rfRand_predict))
print('Printing the precision and recall, among other metrics')
print(metrics.classification_report(Ytest, rfRand_predict))
clf_cv_score = cross_val_score(rfRand, Xtrain, Ytrain, cv=5, scoring="accuracy")
print("Accuracy of Model with Cross Validation is:",clf_cv_score.mean() * 100)

Test Accuracy: 0.9326521133302369
Confusion Matrix for Random Forest:
[[11925   495]
 [  375   123]]
Printing the precision and recall, among other metrics
              precision    recall  f1-score   support

          No       0.97      0.96      0.96     12420
         Yes       0.20      0.25      0.22       498

    accuracy                           0.93     12918
   macro avg       0.58      0.60      0.59     12918
weighted avg       0.94      0.93      0.94     12918

Accuracy of Model with Cross Validation is: 88.92938230383972


***Accuracy for Random Forest using Grid Search for Hyperparameter Tuning***

In [25]:
# Accuracy for Random Forest using Grid Search for Hyperparameter Tuning

print("Test Accuracy:", metrics.accuracy_score(Ytest,rfGrid_predict))
print("Confusion Matrix for Random Forest:")
print(confusion_matrix(Ytest,rfGrid_predict))
print('Printing the precision and recall, among other metrics')
print(metrics.classification_report(Ytest, rfGrid_predict))
clf_cv_score = cross_val_score(rfGrid, Xtrain, Ytrain, cv=10, scoring="accuracy")
print("Accuracy of Model with Cross Validation is:",clf_cv_score.mean() * 100)

Test Accuracy: 0.9285493110388605
Confusion Matrix for Random Forest:
[[11877   543]
 [  380   118]]
Printing the precision and recall, among other metrics
              precision    recall  f1-score   support

          No       0.97      0.96      0.96     12420
         Yes       0.18      0.24      0.20       498

    accuracy                           0.93     12918
   macro avg       0.57      0.60      0.58     12918
weighted avg       0.94      0.93      0.93     12918

Accuracy of Model with Cross Validation is: 88.72954292084727
