# Decision modeling with NaiveBayes

## Loading customer delinquency data

In [8]:
import pandas as pd

df = pd.read_csv('./data/Delinquent_customer-Insurance_Premium.csv')

# Remove unrelated data

df = df.drop(columns=["APPLICATION_SUBMISSION_TYPE", 
                      "POSTAL_ADDRESS_TYPE", 
                      "RESIDENTIAL_PHONE", 
                      "EMAIL", 
                      "PROFESSIONAL_PHONE",
                      "MONTHS_IN_RESIDENCE",
                      "OTHER_INCOMES",
                      "PERSONAL_ASSETS_VALUE",
                      "QUANT_CARS",
                      "MONTHS_IN_THE_JOB",
                      "QUANT_ADDITIONAL_CARDS"])

# Converting categorical data to numeric
df['MARITAL_STATUS'] = pd.Categorical(df['MARITAL_STATUS']).codes
df['GENDER'] = pd.Categorical(df['GENDER']).codes

# Fill all missing data
df['GENDER'] = df['GENDER'].fillna(df['GENDER'].mode()[0])
df['RESIDENCE_TYPE'] = df['RESIDENCE_TYPE'].fillna(df['RESIDENCE_TYPE'].mode()[0])
df['PROFESSION_CODE'] = df['PROFESSION_CODE'].fillna(df['PROFESSION_CODE'].mode()[0])
df['OCCUPATION_TYPE'] = df['OCCUPATION_TYPE'].fillna(df['OCCUPATION_TYPE'].mode()[0])

display(df)

Unnamed: 0,GENDER,MARITAL_STATUS,RESIDENCE_TYPE,PROFESSION_CODE,OCCUPATION_TYPE,PRODUCT,QUANT_DEPENDANTS,PERSONAL_MONTHLY_INCOME,QUANT_BANKING_ACCOUNTS,AGE,TARGET_LABEL
0,0,6,1.0,9.0,4.0,1,1,900.00,0,32,1
1,0,2,1.0,11.0,4.0,1,0,750.00,0,34,1
2,0,2,1.0,11.0,2.0,1,0,500.00,0,27,0
3,0,2,1.0,9.0,2.0,1,0,500.00,0,61,0
4,1,2,1.0,9.0,5.0,1,0,1200.00,0,48,1
...,...,...,...,...,...,...,...,...,...,...,...
49995,0,1,1.0,9.0,4.0,1,2,1451.00,1,36,1
49996,0,1,1.0,9.0,2.0,2,0,450.00,0,21,0
49997,1,2,2.0,9.0,2.0,1,3,1555.00,0,41,0
49998,0,1,1.0,9.0,2.0,1,1,1443.27,0,28,1


## Readying data frame for machine learning

In [9]:
from sklearn.model_selection import train_test_split

y = df['TARGET_LABEL'].to_numpy()

df=df.drop(columns=['TARGET_LABEL'])

X = df.to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X,
                                                   y,
                                                   test_size=0.33,
                                                   random_state=42)

## Building a NaiveBayes classifier

In [11]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

gnb = GaussianNB()

print("Training Gaussian NaiveBaye's classifier...")
gnb.fit(X_train, y_train)

Training Gaussian NaiveBaye's classifier...
Predicting with test data...
Evaluating the prediction...
              precision    recall  f1-score   support

           0       0.74      0.97      0.84     12215
           1       0.33      0.04      0.07      4285

    accuracy                           0.73     16500
   macro avg       0.54      0.51      0.46     16500
weighted avg       0.63      0.73      0.64     16500

