# White wines classification using neural network
gery, July 3, 2018.

This notebook includes how to import csv file and convert it into array so that Tensorflow is able to deal with.

The dataset comes from [winequality-white.csv](http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/).

### 0 - Necessary libraries

In [1]:
import numpy as np
import csv
import tensorflow as tf
import random

## 1 - Data processing

### 1 - 1 - Load data

In [2]:
csvfile = open('winequality-white.csv')
reader = csv.reader(csvfile, delimiter=';')
temp = []
for row in reader:
    temp.append(row)

In [3]:
data = np.array(temp[1:], dtype = float)
print np.shape(data)
print type(data)
print data[0]

(4898, 12)
<type 'numpy.ndarray'>
[7.000e+00 2.700e-01 3.600e-01 2.070e+01 4.500e-02 4.500e+01 1.700e+02
 1.001e+00 3.000e+00 4.500e-01 8.800e+00 6.000e+00]


### 1 - 2 - How many examples of each kind of different quality wine do we have?

In [4]:
table = np.array([0, 0], dtype = int)
for score in range(11):
    temp = 0
    for j in data[:, 11]:
        if j == score:
            temp = temp + 1
    table = np.vstack([table, [score, temp]])
table = table[1:,:]

In [5]:
print "quality","number"
print table

quality number
[[   0    0]
 [   1    0]
 [   2    0]
 [   3   20]
 [   4  163]
 [   5 1457]
 [   6 2198]
 [   7  880]
 [   8  175]
 [   9    5]
 [  10    0]]


In [6]:
# sio.savemat('whitewineQualityPrediction', {'trainingData': trainingData, 'testData': testData})

### 1 - 3 - Respectively pick up wines whose score lines in 5 to 7

In [7]:
dic = {'table5': np.array(range(12)), 'table6': np.array(range(12)), 'table7': np.array(range(12))}
for score in range(5, 8):
    index = 0
    for j in data[:, 11]:
        if j == score:
            dic['table' + str(score)] = np.vstack([dic['table' + str(score)], data[index, :]])
        index = index + 1
    dic['table' + str(score)] = dic['table' + str(score)][1:, :]

In [8]:
for score in range(5, 8):
    print np.shape(dic['table' + str(score)])

(1457, 12)
(2198, 12)
(880, 12)


In [9]:
1457+2198+880

4535

### 1 - 3 - Randomly pick up training examples

In [10]:
trainingData = np.array(range(12))
testData = np.array(range(12))
rate = .7  # The proportion of training examples over whole examples

# shuffle the data
for score in range(5, 8):
    m = np.shape(dic['table' + str(score)])
    index = range(m[0])
    random.shuffle(index)
    trainingData = np.vstack((trainingData, dic['table' + str(score)][index[:np.int(np.round(rate*m[0]))], :]))
    testData = np.vstack([testData, dic['table' + str(score)][index[np.int(np.round(rate*m[0])):], :]])
trainingData = trainingData[1:, :]
index = range(np.shape(trainingData)[0])
random.shuffle(index)
trainingData = trainingData[index, :]
testData = testData[1:, :]

## 2 - Classification using neural network

## 2 - 1 - Build network

In [11]:
learning_rate = 0.1
num_steps = 500
display_step = num_steps/10

In [12]:
# Network Parameters
n_hidden_1 = 100 # 1st layer number of neurons
n_hidden_2 = 100 # 2nd layer number of neurons
num_input = np.shape(trainingData)[1] - 1
num_classes = 1

X = tf.placeholder("float", [None, num_input])
Y = tf.placeholder("float", [None, num_classes])

In [13]:
# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([num_classes]))
}

In [14]:
# # Create linear model
# def neural_net(x):
#     # Hidden fully connected layer with 256 neurons
#     layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
#     # Hidden fully connected layer with 256 neurons
#     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
#     # Output fully connected layer with a neuron for each class
#     out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
#     return out_layer

In [15]:
# Sigmoid model
def neural_net(x):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.sigmoid(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

In [16]:
# Construct model
logits = neural_net(X)

# Define loss and optimizer
loss_op = tf.norm(logits - Y, 2)
# loss_op = tf.reduce_mean(logits - Y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.round(logits), Y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
prediction = tf.round(logits)

## 2 - 2 - Start training

In [17]:
# Start training
init = tf.global_variables_initializer()
with tf.Session() as sess:
    
    # Run the initializer
    sess.run(init)

    for step in range(1, num_steps+1):
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: trainingData[:,0:num_input], Y: trainingData[:,num_input:num_input + 1]})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: trainingData[:,0:num_input],
                                                                 Y: trainingData[:,num_input:num_input + 1]})
            print("Step " + str(step) + ", Training Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for MNIST test images
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={X: testData[:,0:num_input],Y: testData[:,num_input:num_input + 1]}))
    
    # comparison between true tesing labels and predicted labels
    print "Predicted labels:"
    print sess.run(prediction, feed_dict={X: testData[:,0:num_input]}).T
    print "True testing labels:"
    print testData[:, num_input]

Step 1, Training Loss= 988.8809, Training Accuracy= 0.000
Step 50, Training Loss= 40.0521, Training Accuracy= 0.485
Step 100, Training Loss= 39.7664, Training Accuracy= 0.486
Step 150, Training Loss= 37.1872, Training Accuracy= 0.485
Step 200, Training Loss= 36.4443, Training Accuracy= 0.485
Step 250, Training Loss= 40.8473, Training Accuracy= 0.468
Step 300, Training Loss= 36.3336, Training Accuracy= 0.485
Step 350, Training Loss= 35.2286, Training Accuracy= 0.485
Step 400, Training Loss= 38.6021, Training Accuracy= 0.445
Step 450, Training Loss= 39.3795, Training Accuracy= 0.449
Step 500, Training Loss= 34.8642, Training Accuracy= 0.485
Optimization Finished!
('Testing Accuracy:', 0.48455882)
Predicted labels:
[[6. 6. 6. ... 6. 6. 6.]]
True testing labels:
[5. 5. 5. ... 7. 7. 7.]
