In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [3]:
df=pd.read_csv('data/bank-full.csv', sep=';',header=0)
# Basic data cleanup to get rid of bad values and turn Y into 0-1 val
df=df.dropna(axis=1, how='all')
df=df.dropna(axis=0, how='any')
cat_cols=[]
i = 0 
for eachcol in df.dtypes:
    if eachcol.name=="object":
        cat_cols.append(df.columns[i])
    i=i+1
# Convert the string values into integers, and give each value its own column, hot encode
df=pd.get_dummies(df,columns=cat_cols)
df.head()
X=df.iloc[:,0:-2]
y=df['y_yes']
# Use sklearn to split up the dataset
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12) # Could Add a RandomState number to set the rand kernel
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train_n=scaler.transform(x_train)
x_test_n=scaler.transform(x_test)

# Decision Tree 

In [4]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(max_depth=5)
model=clf.fit(x_train_n,y_train)
accuracy = model.score(x_test_n, y_test)
print("Decision Tree accuracy: {0:.3f}%".format(accuracy))

Decision Tree accuracy: 0.903%


# Neural Network

In [5]:
from sklearn.neural_network import MLPClassifier
X = [[0., 0.], [1., 1.]]
y = [0, 1]
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(5, 2), random_state=1)

model = clf.fit(x_train_n, y_train)
accuracy = model.score(x_test_n, y_test)
print("Neural Net Accuracy: {0:.3f}%".format(accuracy))


Neural Net Accuracy: 0.884%


# Boosting

In [6]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
clf = AdaBoostClassifier(n_estimators=100, random_state=0)
model = clf.fit(x_train_n, y_train)

accuracy = model.score(x_test_n, y_test)
print("Boosting Accuracy: {0:.3f}%".format(accuracy))

Boosting Accuracy: 0.903%


# Support Vector Machines

In [7]:
from sklearn import svm
clf = svm.SVC()
model = clf.fit(x_train_n, y_train)
accuracy = model.score(x_test_n, y_test)
print("SVM Accuracy: {0:.3f}%".format(accuracy))

SVM Accuracy: 0.895%


# k-Nearest Neighbors

In [8]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(3)
model = clf.fit(x_train_n, y_train)
accuracy = model.score(x_test_n, y_test)
print("k-NN Accuracy: {0:.3f}%".format(accuracy))

k-NN Accuracy: 0.885%
