In [1]:
# imports
import tensorflow as tf 
import numpy as np 
from sklearn.datasets import load_wine
from sklearn.utils import shuffle

print("TF version {}".format(tf.__version__))
print("numpy version {}".format(np.__version__))
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

TF version 2.1.0
numpy version 1.18.1
Eager mode:  True
GPU is NOT AVAILABLE


In [2]:
def get_train_test_validate(data_array, test_percent, validate_percent):
    index = len(data_array)
    print("the train max index is {}".format(index))

    # get the test/train split
    index = int(index * test_percent)
    train_data = data_array[:index]
    test_data = data_array[index:]
    print("the test split index is {}".format(index))

    # get the train/validate split
    index = int(index * validate_percent)
    validate_data = train_data[index:]
    train_data = train_data[:index]
    print("the validate split index is {}".format(index))

    # return
    return train_data, test_data, validate_data


In [3]:
# load the wine data
wine_df = load_wine()

print("the keys are {}".format(wine_df.keys()))

the keys are dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])


In [4]:
# get the features and labels
X = wine_df.data
y = wine_df.target

print("the full features are of type {} and shape {}".format(type(X), X.shape))
print("the full labels are of type {} and shape {}".format(type(y), y.shape))



the full features are of type <class 'numpy.ndarray'> and shape (178, 13)
the full labels are of type <class 'numpy.ndarray'> and shape (178,)


In [5]:
# shuffle the data
X, y = shuffle(X, y, random_state = 2)


In [6]:
# get the train/test data
X_train, X_test, X_validate = get_train_test_validate(X, 0.8, 0.8)

print("the train dataset if of type {} and shape {}".format(type(X_train), X_train.shape))
print("the test dataset if of type {} and shape {}".format(type(X_test), X_test.shape))
print("the validate dataset if of type {} and shape {}".format(type(X_validate), X_validate.shape))

the train max index is 178
the test split index is 142
the validate split index is 113
the train dataset if of type <class 'numpy.ndarray'> and shape (113, 13)
the test dataset if of type <class 'numpy.ndarray'> and shape (36, 13)
the validate dataset if of type <class 'numpy.ndarray'> and shape (29, 13)


In [7]:
# split the labels into train/test/validate labels
y_train, y_test, y_validate = get_train_test_validate(y, 0.8, 0.8)

print("the train labels if of type {} and shape {}".format(type(y_train), y_train.shape))
print("the test labels if of type {} and shape {}".format(type(y_test), y_test.shape))
print("the validate labels if of type {} and shape {}".format(type(y_validate), y_validate.shape))

the train max index is 178
the test split index is 142
the validate split index is 113
the train labels if of type <class 'numpy.ndarray'> and shape (113,)
the test labels if of type <class 'numpy.ndarray'> and shape (36,)
the validate labels if of type <class 'numpy.ndarray'> and shape (29,)


In [8]:
# build the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation= 'relu', input_shape=(13,)),
    # tf.keras.layers.AveragePooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1400      
_________________________________________________________________
dense_1 (Dense)              (None, 30)                3030      
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 93        
Total params: 4,523
Trainable params: 4,523
Non-trainable params: 0
_________________________________________________________________


In [9]:
# compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# will only work if lables are one hot matrix?
# from keras.utils import to_categorical
# y_binary = to_categorical(y_int)
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# option from the TF classification example
# model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])



In [10]:
# fit the model
number_epochs=20

model.fit(X_train, y_train, epochs=number_epochs, batch_size=10, validation_data=(X_validate, y_validate), verbose=1)



Train on 113 samples, validate on 29 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7ffa6112efd0>

In [11]:
# predict
prediction = model.predict(X_test[1:20])

print("the prediction was {}".format(prediction))

the prediction was [[2.1177220e-06 9.8649526e-01 1.3502649e-02]
 [1.3138255e-03 4.3912458e-01 5.5956167e-01]
 [1.3727257e-08 9.8412782e-01 1.5872153e-02]
 [4.9210019e-05 4.7491652e-01 5.2503431e-01]
 [9.9976033e-01 2.3833656e-04 1.3218638e-06]
 [4.0676489e-02 9.2136925e-01 3.7954308e-02]
 [2.8641678e-06 9.9151272e-01 8.4844567e-03]
 [1.8600703e-03 9.6884400e-01 2.9295921e-02]
 [2.1820088e-05 5.0151157e-01 4.9846655e-01]
 [9.9999762e-01 2.3423183e-06 1.1261897e-09]
 [9.9881750e-01 1.1644558e-03 1.7937857e-05]
 [9.9949872e-01 4.9924245e-04 2.0597465e-06]
 [2.6454227e-05 9.8768044e-01 1.2293065e-02]
 [9.9999917e-01 8.1645021e-07 4.5232245e-11]
 [9.8040080e-05 9.7703701e-01 2.2865001e-02]
 [6.3037727e-04 9.6536070e-01 3.4008868e-02]
 [4.4984868e-06 9.8710060e-01 1.2894922e-02]
 [9.9988890e-01 1.1096697e-04 1.6975825e-07]
 [1.6138860e-04 9.9022502e-01 9.6135745e-03]]


In [12]:
# validate the prediction
print("the test labels were {}".format(y_test[1:20]))

the test labels were [1 2 1 2 0 1 1 1 2 0 0 0 1 0 1 0 1 0 1]
