# This notebook uses the UCI banknote dataset
- https://archive.ics.uci.edu/ml/datasets/banknote+authentication

In [None]:
import pandas as pd
import matplotlib.pyplot as pyplot
import itertools
from mpl_toolkits import mplot3d
from scipy.stats import zscore
import keras
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
from sklearn import metrics

%matplotlib inline

## Data set

In [None]:
df = pd.read_csv('../dataset/data_banknote_authentication.csv')
feats = list(df)[:-1]
df.drop_duplicates(keep='first', inplace=True, ignore_index=True, subset=feats)
neg, pos = [df.loc[df['class'] == arg] for arg in (0, 1)]
total = len(df)
print("negative: {:.0%}, positive: {:.0%}".format(len(neg) / total, len(pos) / total))
df.describe()

## Feature Plots

In [None]:
#One dimensional plots
for f in feats:
    pos_fs, neg_fs = list(pos[f]), list(neg[f])
    p_xs, n_xs = range(len(pos_fs)), range(len(neg_fs))
    fig, ax = pyplot.subplots()
    ax.plot(p_xs, pos_fs, 'ro', n_xs, neg_fs, 'bo')
    fig.suptitle(f)

The blue dots hang over the side due to class imbalance and inclusion of all points.

- Variance appears to be the most informative feature. 
- Skewness and curtosis show that some positive examples can be identified on the outskirts of their respective distributions.
- Entropy appears to be less informative than the other features.

In [None]:
#Two dimensional plots
for f1, f2 in itertools.combinations(feats, 2):
    px, py = list(pos[f1]), list(pos[f2])
    nx, ny = list(neg[f1]), list(neg[f2])
    fig, ax = pyplot.subplots()
    ax.plot(px, py, 'ro', nx, ny, 'bo')
    fig.suptitle("{}-{}".format(f1, f2))
    

 - Plots with variance show formation of point clouds.
 - When coupled with variance, skewness and curtosis show isolated clouds of positive examples.
 - variance-entropy shows the potential for separability.
 - Plots without variance are not well separatred.
 - skewness-entropy and skewness-curtosis appear to show the shifts in the feature distributions.

In [None]:
#Three dimensional plots
for f1, f2, f3 in itertools.combinations(feats, 3):
    pos_dp, neg_dp = [[list(df[f]) for f in (f1, f2, f3)] for df in (pos, neg)]
    fig = pyplot.figure()
    ax = pyplot.axes(projection='3d')
    for d, c, m in ((pos_dp, 'r', 'o'), (neg_dp, 'b', 'o')):
        ax.scatter3D(d[0], d[1], d[2], c=c, marker=m)
        fig.suptitle("{}-{}-{}".format(f1, f2, f3))

- variance-skewness-curtosis appears to show two nearly distinct clouds. Rotating that graph may show a few mixed points, but not many.
- The other plots show cloud formation, but they appear to be less distinct.

## Neural Net

In [None]:
#standardize data
for f in feats:
    df.update({f: zscore(df[f])})

#Plot standardized features
xs = range(len(df))     
args = [(f, xs, df[f], c) for f, c in zip(feats, ('ro', 'bo', 'go', 'yo'))]     
for a in args:         
    ar = a[1:]         
    fig, ax = pyplot.subplots()         
    ax.plot(*ar)
    fig.suptitle(a[0])

Standardization or zscores were chosen because the outliars in curtosis and entropy produced distinct point clouds.

- The distributions of the features overlap from [-2, 2].

In [None]:
#Split the data, 80/20
sp = 0.8
n = len(df.loc[df['class'] == 0]) / len(df)
p = 1 - n
train = int(len(df) * sp)
trp = int(train * p)
trn = train - trp

pos = df.loc[df['class'] == 1].sample(frac=1)
train_pos, test_pos = pos.iloc[:trp], pos.iloc[trp:]

neg = df.loc[df['class'] == 0].sample(frac=1)
train_neg, test_neg = neg.iloc[:trn], neg.iloc[trn:]

df_train_tot = len(train_pos) + len(train_neg)
df_test_tot = len(test_pos) + len(test_neg)
print("train pos: {:.0%}, train neg: {:.0%}"\
          .format(len(train_pos) / df_train_tot, len(train_neg) / df_train_tot))
print("train pos: {:.0%}, train neg: {:.0%}"\
          .format(len(test_pos) / df_test_tot, len(test_neg) / df_test_tot))
print("train: {:.0%}, test: {:.0%}".format(df_train_tot / len(df), df_test_tot / len(df)))

In [None]:
def xs_and_ys(df):
    return np.asarray(df[list(df)[:-1]]), np.asarray(df[list(df)[-1]])

#Sort all of train and test sets and create mappings
all_train = pd.concat([train_pos, train_neg]).sample(frac=1)
all_test = pd.concat([test_pos, test_neg]).sample(frac=1)
trn_x, trn_y = xs_and_ys(all_train)
tst_x, tst_y = xs_and_ys(all_test)

#Create model and train
model = Sequential()
model.add(Dense(3, input_dim=4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])
model.fit(trn_x, trn_y, epochs=15, batch_size=5)

In [None]:
#Evaludate model
loss, acc = model.evaluate(tst_x, tst_y)
print("eval: loss: {:.04}, acc: {:.0%}".format(loss, acc))

#Create ROC graph
preds = model.predict(tst_x)
ps, ys = zip(*[(preds[i][0], tst_y[i]) for i in range(len(preds))])
fp_rate, tp_rate, th = metrics.roc_curve(np.array(ys), np.array(ps))
sk_fig, sk_ax = pyplot.subplots()

sk_ax.plot(fp_rate, tp_rate, 'b', [0, 1], [0, 1], '--r')
sk_ax.set_xlabel('FPR')
sk_ax.set_ylabel('TPR')
auc = metrics.auc(fp_rate, tp_rate)
sk_ax.legend(["AUC = {:.05}".format(auc)])
sk_fig.suptitle('ROC Curve')