# Classification of the Marketing Dataset
### Who Bought the Product?
- Stephen W. Thomas
- Used for MMAI 869

In [1]:
import datetime
print(datetime.datetime.now())

2019-10-05 09:43:41.040090


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

import sklearn.metrics
from sklearn.preprocessing import StandardScaler

import itertools
import scipy

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import sklearn
print('The scikit-learn version is {}.'.format(sklearn.__version__))

# Load Data

In [None]:
df = pd.read_csv('data/marketing.csv')
df.info()
df.head(15)

In [None]:
X = df[['Age', 'Income']]
y = df['Bought']

# Splitting the Data

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Trees

In [None]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42, criterion="entropy",
                             max_depth=3, max_leaf_nodes=5)
clf.fit(X_train, y_train)

y_pred_dt = clf.predict(X_test)

## Use the Model to Predict Someone New

In [None]:
clf.predict_proba([[2, 2]])
clf.predict([[2, 2]])

## Model Parameters

Surpisingly, sci-kit learn does not have a function to print the decision tree in text format. (It does have a way to graphical render the three, which we'll do later.) For now, we'll just print a few stats about the tree.

In [None]:
print(clf.tree_.node_count)
print(clf.tree_.impurity)
print(clf.tree_.children_left)
print(clf.tree_.threshold)

## Model Performance

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred_dt)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred_dt, target_names=class_names))

## Model Selection

### Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}

treeclf = DecisionTreeClassifier(splitter='best', presort=True, class_weight=None, random_state=42)
parameters = {'criterion':('gini', 'entropy'), 'max_depth':[2, 4, 6, 8, 10], 'min_samples_split':[2, 10, 50], 'min_samples_leaf':[1, 5, 10],
             'max_features':[None, 'auto'], 'max_leaf_nodes':[None, 5, 10, 50], 'min_impurity_decrease':[0, 0.1, 0.2]}
cv_clf = GridSearchCV(treeclf, parameters, scoring='roc_auc', cv=5, return_train_score=True)
%time cv_clf.fit(X, y)

In [None]:
cv_clf.best_params_
cv_clf.best_score_
cv_clf.best_estimator_

In [None]:
figure = plt.figure(figsize=(10, 10));
ax = plt.subplot(1, 1, 1);
plot_boundaries(X_train, X_test, y_train, y_test, cv_clf.best_estimator_, "Decision Tree", ax, hide_ticks=False, show_train=False)
ax.set_xlabel("Age", fontsize=22)
ax.set_ylabel("Income", fontsize=22)
plt.tight_layout();
plt.savefig('out/marketing-dt-decision-grid-test.png', transparent=False);

# Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb = gnb.fit(X_train, y_train)
gnb

y_pred_gnb = gnb.predict(X_test)

## Model Parameters

In [None]:
gnb.theta_ # Mean of each feature per class
gnb.sigma_ # Variance of each feature per class

## Model Performance

In [None]:
confusion_matrix(y_test, y_pred_gnb)

In [None]:
print(classification_report(y_test, y_pred_gnb, target_names=class_names))

In [None]:
print("Accuracy = {:.2f}".format(accuracy_score(y_test, y_pred_gnb)))
print("Kappa = {:.2f}".format(cohen_kappa_score(y_test, y_pred_gnb)))
print("F1 Score = {:.2f}".format(f1_score(y_test, y_pred_gnb)))
print("Log Loss = {:.2f}".format(log_loss(y_test, y_pred_gnb)))

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train, y_train)

y_pred_knn = knn_clf.predict(X_test)

## Model Parameters

In [None]:
knn_clf.effective_metric_
knn_clf.effective_metric_params_

## Model Performance

In [None]:
print(confusion_matrix(y_test, y_pred_knn))

In [None]:
print(classification_report(y_test, y_pred_knn, target_names=class_names))

In [None]:
print("Accuracy = {:.2f}".format(accuracy_score(y_test, y_pred_knn)))
print("Kappa = {:.2f}".format(cohen_kappa_score(y_test, y_pred_knn)))
print("F1 Score = {:.2f}".format(f1_score(y_test, y_pred_knn)))
print("Log Loss = {:.2f}".format(log_loss(y_test, y_pred_knn)))

# SVM - Linear

In [None]:
from sklearn.svm import SVC

svm_clf = SVC(kernel="linear", C=0.025)
svm_clf.fit(X_train, y_train)

y_pred_svm = svm_clf.predict(X_test)

## Model Parameters

In [None]:
svm_clf.n_support_

In [None]:
svm_clf.support_vectors_

In [None]:
svm_clf.dual_coef_ 

In [None]:
svm_clf.intercept_ 

In [None]:
print(confusion_matrix(y_test, y_pred_svm))

In [None]:
print(classification_report(y_test, y_pred_svm, target_names=class_names))

In [None]:
print("Accuracy = {:.2f}".format(accuracy_score(y_test, y_pred_svm)))
print("Kappa = {:.2f}".format(cohen_kappa_score(y_test, y_pred_svm)))
print("F1 Score = {:.2f}".format(f1_score(y_test, y_pred_svm)))
print("Log Loss = {:.2f}".format(log_loss(y_test, y_pred_svm)))

## Model Visualization

In [None]:
type(X)
type(y)

In [None]:

# get the separating hyperplane
w = svm_clf.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(-5, 5)
yy = a * xx - (svm_clf.intercept_[0]) / w[1]

# plot the parallels to the separating hyperplane that pass through the
# support vectors (margin away from hyperplane in direction
# perpendicular to hyperplane). This is sqrt(1+a^2) away vertically in
# 2-d.
margin = 1 / np.sqrt(np.sum(svm_clf.coef_ ** 2))
yy_down = yy - np.sqrt(1 + a ** 2) * margin
yy_up = yy + np.sqrt(1 + a ** 2) * margin

# plot the line, the points, and the nearest vectors to the plane
plt.figure(figsize=(4, 3))
plt.clf()
plt.plot(xx, yy, 'k-')
plt.plot(xx, yy_down, 'k--')
plt.plot(xx, yy_up, 'k--')

plt.scatter(svm_clf.support_vectors_[:, 0], svm_clf.support_vectors_[:, 1], s=80,
            facecolors='none', zorder=10, edgecolors='k')
plt.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired,
            edgecolors='k')

plt.axis('tight')
x_min = -4.8
x_max = 4.2
y_min = -6
y_max = 6

In [None]:
figure = plt.figure(figsize=(10, 10));
ax = plt.subplot(1, 1, 1);
plot_boundaries(X_train, X_test, y_train, y_test, svm_clf, "SVM (Linear)", ax, hide_ticks=False, show_train=False)
ax.set_xlabel("Age", fontsize=22)
ax.set_ylabel("Income", fontsize=22)
plt.tight_layout();
plt.savefig('out/marketing-svm-decision-test.png', transparent=False);

## Experimenting with Different Kernels

In [None]:
names = ["Linear C=0.0025", "Linear C=0.25", "Linear C=25"]

classifiers = [
    SVC(kernel="linear", C=0.0025),
    SVC(kernel="linear", C=0.25),
    SVC(kernel="linear", C=25),
]

rng = np.random.RandomState(2)

figure = plt.figure(figsize=(27, 10));
i = 1

# iterate over classifiers
for name, clf_tmp in zip(names, classifiers):
    ax = plt.subplot(1, 3, i);
    clf_tmp.fit(X_train, y_train);
    plot_boundaries(X_train, X_test, y_train, y_test, clf_tmp, name, ax, hide_ticks=True, show_train=False);
    i += 1

plt.tight_layout();
plt.savefig('out/marketing-svm-decision-test-all-c.png', transparent=False);

In [None]:
names = ["Poly 2", "Poly 3", "Poly 4"]

classifiers = [
    SVC(kernel="poly", degree=2, C=0.25),
    SVC(kernel="poly", degree=3, C=1),
    SVC(kernel="poly", degree=4, C=1),
]

rng = np.random.RandomState(2)

figure = plt.figure(figsize=(27, 10));
i = 1

# iterate over classifiers
for name, clf_tmp in zip(names, classifiers):
    ax = plt.subplot(1, 3, i);
    clf_tmp.fit(X_train, y_train);
    plot_boundaries(X_train, X_test, y_train, y_test, clf_tmp, name, ax, hide_ticks=True, show_train=False);
    i += 1

plt.tight_layout();
plt.savefig('out/marketing-svm-decision-test-all-poly.png', transparent=False);

In [None]:
names = ["RBF G=0.05", "RBF G=0.5", "RBF G=5.0"]

classifiers = [
    SVC(kernel="rbf", gamma=0.05, C=1),
    SVC(kernel="rbf", gamma=0.5, C=1),
    SVC(kernel="rbf", gamma=5.0, C=1),
]

rng = np.random.RandomState(2)

figure = plt.figure(figsize=(27, 10));
i = 1

# iterate over classifiers
for name, clf_tmp in zip(names, classifiers):
    ax = plt.subplot(1, 3, i);
    clf_tmp.fit(X_train, y_train);
    plot_boundaries(X_train, X_test, y_train, y_test, clf_tmp, name, ax, hide_ticks=True, show_train=False);
    i += 1

plt.tight_layout();
plt.savefig('out/marketing-svm-decision-test-all-rbf.png', transparent=False);

# NN

In [None]:
from sklearn.neural_network import MLPClassifier

nn_clf = MLPClassifier(solver='lbfgs', activation='relu', alpha=1e-3, 
                       hidden_layer_sizes=(3), random_state=1, verbose=True)
nn_clf.fit(X_train, y_train)

y_pred_nn = nn_clf.predict(X_test)

## Model Parameters

In [None]:
nn_clf.loss_

In [None]:
nn_clf.n_layers_

In [None]:
w = nn_clf.coefs_ # The ith element in the list represents the weight matrix corresponding to layer i.
w

In [None]:
b = nn_clf.intercepts_ # The ith element in the list represents the bias vector corresponding to layer i + 1.
b

In [None]:
nn_clf.out_activation_

In [None]:
nn_clf.predict_proba([[0.5, 1]])

## Model Performance

In [None]:
print(confusion_matrix(y_test, y_pred_nn))

In [None]:
print(classification_report(y_test, y_pred_nn, target_names=class_names))

In [None]:
print("Accuracy = {:.2f}".format(accuracy_score(y_test, y_pred_nn)))
print("Kappa = {:.2f}".format(cohen_kappa_score(y_test, y_pred_nn)))
print("F1 Score = {:.2f}".format(f1_score(y_test, y_pred_nn)))
print("Log Loss = {:.2f}".format(log_loss(y_test, y_pred_nn)))