In [None]:
import time

import matplotlib.pyplot as plt
import numpy as np
import sklearn.metrics
import sklearn.datasets
import tensorflow as tf

# Prints numpy arrays nicer
np.set_printoptions(precision=2, suppress=True, linewidth=100)

In [None]:
wine = sklearn.datasets.load_wine()
# Uncomment the following line for a description of the dataset.
# print(wine['DESCR'])
xs = wine.data
ys = wine.target

standardize_xs = True
if standardize_xs:
    xs = (xs - np.mean(xs, axis=0)) / np.std(xs, axis=0)

data = list(zip(xs, ys))

# Need to shuffle data before split, because it's ordered after ys by default.
np.random.shuffle(data)

# Perform 60% / 40% training/test split
split_index = int(len(data) * 0.6)
train_data = data[:split_index]
test_data = data[split_index:]
print('Num training examples:', len(train_data))
print('Num testing examples:', len(test_data))

In [None]:
# Hyperparameters
learning_rate = 0.00005 if not standardize_xs else 50
num_epochs = 10000 if not standardize_xs else 100
num_features = len(train_data[0][0])
num_classes = 3
batch_size = 10

# Model Definition
batch_x = tf.placeholder(tf.float32, shape=[None, num_features])
batch_y = tf.placeholder(tf.int32, shape=[None])

W = tf.Variable(tf.random_normal(shape=[num_features, num_classes],
                                 mean=0, stddev=1))
b = tf.Variable(tf.zeros(shape=[num_classes]))

logits = tf.matmul(batch_x, W) + b
y_prediction = tf.argmax(logits, axis=-1, output_type=tf.int32)

loss = tf.losses.sparse_softmax_cross_entropy(batch_y, logits)
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Training
    time_before = time.time()
    losses = []  # Storing losses so we can plot them later
    for epoch in range(num_epochs):
        np.random.shuffle(train_data)
        cumulative_loss = 0
        for i in range(0, len(train_data), batch_size):
            _batch_x, _batch_y = zip(*train_data[i:i + batch_size])
            _loss, _train_op = sess.run(
                (loss, train_op),
                feed_dict={batch_x: _batch_x, batch_y: _batch_y})
            cumulative_loss += _loss * len(_batch_x)
        average_loss = cumulative_loss / len(train_data)
        if epoch % 20 == 19:
            print('Epoch: {}, Loss: {}'.format(epoch + 1, average_loss))
        losses.append(average_loss)
    time_after = time.time()
    print('Training took {:.2f}s.'.format(time_after - time_before))

    # Prediction
    train_xs, train_ys = zip(*train_data)
    train_ys_prediction = sess.run(y_prediction, feed_dict={batch_x: train_xs})

    test_xs, test_ys = zip(*test_data)
    test_ys_prediction = sess.run(y_prediction, feed_dict={batch_x: test_xs})

In [None]:
plt.figure(dpi=150)
plt.title('Loss over Time')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(range(len(losses)), losses, color='#458588')
plt.show()

In [None]:
print('Precision on Training data:',
      sklearn.metrics.precision_score(train_ys, train_ys_prediction,
                                      average='macro'))
print('Recall on Training data:',
      sklearn.metrics.recall_score(train_ys, train_ys_prediction,
                                   average='macro'))
print('F1-Score on Training data:',
      sklearn.metrics.f1_score(train_ys, train_ys_prediction, average='macro'))
print('Accuracy on Training data:',
      sklearn.metrics.accuracy_score(train_ys, train_ys_prediction))
print()
print('Precision on Testing data:',
      sklearn.metrics.precision_score(test_ys, test_ys_prediction,
                                      average='macro'))
print('Recall on Testing data:',
      sklearn.metrics.recall_score(test_ys, test_ys_prediction,
                                   average='macro'))
print('F1-Score on Testing data:',
      sklearn.metrics.f1_score(test_ys, test_ys_prediction, average='macro'))
print('Accuracy on Testing data:',
      sklearn.metrics.accuracy_score(test_ys, test_ys_prediction))

train_num_labels = []
train_num_labels_prediction = []
test_num_labels = []
test_num_labels_prediction = []
for i in range(3):
    train_num_labels.append(np.sum(np.equal(train_ys, i)))
    train_num_labels_prediction.append(np.sum(np.equal(train_ys_prediction, i)))
    test_num_labels.append(np.sum(np.equal(test_ys, i)))
    test_num_labels_prediction.append(np.sum(np.equal(test_ys_prediction, i)))

plt.figure(dpi=150)
plt.title('Class Distribution Actual vs Predicted: Training Data')
plt.ylabel('Frequency')
plt.bar([1, 2, 3.5, 4.5, 6, 7],
        [train_num_labels[0], train_num_labels_prediction[0],
         train_num_labels[1], train_num_labels_prediction[1],
         train_num_labels[2], train_num_labels_prediction[2]],
        tick_label=['Actual A', 'Predicted A',
                    'Actual B', 'Predicted B',
                    'Actual C', 'Predicted C'],
        color=['#458588', '#CC241D'])
plt.show()

plt.figure(dpi=150)
plt.title('Class Distribution Actual vs Predicted: Testing Data')
plt.ylabel('Frequency')
plt.bar([1, 2, 3.5, 4.5, 6, 7],
        [test_num_labels[0], test_num_labels_prediction[0],
         test_num_labels[1], test_num_labels_prediction[1],
         test_num_labels[2], test_num_labels_prediction[2]],
        tick_label=['Actual A', 'Predicted A',
                    'Actual B', 'Predicted B',
                    'Actual C', 'Predicted C'],
        color=['#458588', '#CC241D'])
plt.show()