In [1]:
# imports
from sklearn.naive_bayes import BernoulliNB
import functions
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import pandas as pd

In [2]:
# get data
X, y = functions.get_data()

In [3]:
# split dataset into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=10)

In [4]:
# Create GaussianNB classifier
bnb = BernoulliNB()

In [5]:
# Fit the classifier to the data
bnb.fit(X_train, y_train.values.ravel())

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [6]:
# predict test values
y_pred = bnb.predict(X_test)

In [7]:
# calculate F1 score
f1 = f1_score(y_test, y_pred, average='weighted')

In [8]:
"F1 score is = {0}".format(f1)

'F1 score is = 0.7290512586583276'

In [9]:
# write result to file
f = open("results/files/bnb_results.txt", "w+")
f.write("Final F1 score = {0} \n".format(f1))
f.close()

In [10]:
# write all data to CSV file for output
result = pd.DataFrame(y_pred, columns=["y_pred"])

result['y_test'] = y_test["Loan Status"].values

result.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)

result = pd.concat([result, X_test], axis=1)

result.to_csv("results/datasets/bnb_y_pred.csv")

result.head(20)

Unnamed: 0,y_pred,y_test,Current Loan Amount,Term,Credit Score,Annual Income,Years in current job,Monthly Debt,Years of Credit History,Months since last delinquent,...,Purpose_Medical Bills,Purpose_Other,Purpose_Take a Trip,Purpose_major_purchase,Purpose_moving,Purpose_other,Purpose_renewable_energy,Purpose_small_business,Purpose_vacation,Purpose_wedding
0,1,1,-0.40047,0.639329,0.091538,-0.747735,0.230938,-0.14915,2.033864,0.599948,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,3.840193,-0.01048,-0.055289,-0.03357,-0.03274
1,1,1,-0.402311,0.639329,0.161793,-1.206717,-1.430694,-1.091931,-0.490622,-0.814524,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
2,1,1,2.530987,0.639329,0.05641,-0.06313,-0.322939,0.033678,-1.04661,1.28437,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
3,1,1,-0.398169,0.639329,-0.681266,-0.552426,-0.599878,-0.854979,-0.160034,1.375627,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
4,1,0,-0.385431,-1.564141,-2.859167,0.221173,1.061754,0.593562,0.516167,-0.632012,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
5,1,1,-0.391377,0.639329,1.039979,0.395609,-1.707632,0.151266,-0.355382,-1.270806,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
6,1,1,-0.40111,0.639329,0.899469,-1.052728,-1.153755,-0.798297,-0.129981,-0.266987,...,-0.111088,5.538246,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
7,1,1,2.530987,0.639329,0.969724,-0.395453,1.061754,0.487938,0.18558,-0.860152,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
8,1,1,2.530987,0.639329,0.934597,-0.453828,-0.322939,-1.219301,-1.13677,-0.540755,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
9,1,1,-0.391722,0.639329,0.372557,0.264036,-0.322939,0.859388,0.200606,1.010601,...,-0.111088,-0.180563,-0.077776,-0.056524,-0.038889,-0.260404,-0.01048,-0.055289,-0.03357,-0.03274
