In [4]:
# imports
import tensorflow as tf 
import numpy as np 
from sklearn.datasets import load_wine

print("TF version {}".format(tf.__version__))
print("numpy version {}".format(np.__version__))
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

TF version 2.1.0
numpy version 1.18.1
Eager mode:  True
GPU is NOT AVAILABLE


In [25]:
def get_train_test_validate(data_array, test_percent, validate_percent):
    index = len(data_array)
    print("the train max index is {}".format(index))

    # get the test/train split
    index = int(index * test_percent)
    train_data = data_array[:index]
    test_data = data_array[index:]
    print("the test split index is {}".format(index))

    # get the train/validate split
    index = int(index * validate_percent)
    validate_data = train_data[index:]
    train_data = train_data[:index]
    print("the validate split index is {}".format(index))

    # return
    return train_data, test_data, validate_data


In [6]:
# load the wine data
wine_df = load_wine()

print("the keys are {}".format(wine_df.keys()))

the keys are dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])


In [7]:
# get the features and labels
X = wine_df.data
y = wine_df.target

print("the full features are of type {} and shape {}".format(type(X), X.shape))
print("the full labels are of type {} and shape {}".format(type(y), y.shape))



the full features are of type <class 'numpy.ndarray'> and shape (178, 13)
the full labels are of type <class 'numpy.ndarray'> and shape (178,)


In [26]:
# get the train/test data
X_train, X_test, X_validate = get_train_test_validate(X, 0.8, 0.8)

print("the train dataset if of type {} and shape {}".format(type(X_train), X_train.shape))
print("the test dataset if of type {} and shape {}".format(type(X_test), X_test.shape))
print("the validate dataset if of type {} and shape {}".format(type(X_validate), X_validate.shape))

the train max index is 178
the test split index is 142
the validate split index is 113
the train dataset if of type <class 'numpy.ndarray'> and shape (113, 13)
the test dataset if of type <class 'numpy.ndarray'> and shape (36, 13)
the validate dataset if of type <class 'numpy.ndarray'> and shape (29, 13)


In [27]:
# split the labels into train/test/validate labels
y_train, y_test, y_validate = get_train_test_validate(y, 0.8, 0.8)

print("the train labels if of type {} and shape {}".format(type(y_train), y_train.shape))
print("the test labels if of type {} and shape {}".format(type(y_test), y_test.shape))
print("the validate labels if of type {} and shape {}".format(type(y_validate), y_validate.shape))

the train max index is 178
the test split index is 142
the validate split index is 113
the train labels if of type <class 'numpy.ndarray'> and shape (113,)
the test labels if of type <class 'numpy.ndarray'> and shape (36,)
the validate labels if of type <class 'numpy.ndarray'> and shape (29,)


In [61]:
# build the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation= 'relu', input_shape=(13,)),
    # tf.keras.layers.AveragePooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

model.summary()


Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_27 (Dense)             (None, 100)               1400      
_________________________________________________________________
dense_28 (Dense)             (None, 30)                3030      
_________________________________________________________________
dense_29 (Dense)             (None, 3)                 93        
Total params: 4,523
Trainable params: 4,523
Non-trainable params: 0
_________________________________________________________________


In [73]:
# compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# will only work if lables are one hot matrix?
# from keras.utils import to_categorical
# y_binary = to_categorical(y_int)
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# option from the TF classification example
# model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])



In [74]:
# fit the model
number_epochs=20

model.fit(X_train, y_train, epochs=number_epochs, batch_size=10, validation_data=(X_validate, y_validate), verbose=1)



Train on 113 samples, validate on 29 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fea2202c450>

In [75]:
# predict
prediction = model.predict(X_test[1:20])

print("the prediction was {}".format(prediction))

the prediction was [[2.8327382e-01 7.1672618e-01 3.1691159e-17]
 [9.9984121e-01 1.5876286e-04 3.2272273e-25]
 [9.9995399e-01 4.6051224e-05 1.1991408e-24]
 [2.2044435e-02 9.7795564e-01 7.7303403e-15]
 [9.9103957e-01 8.9603942e-03 4.0465055e-19]
 [9.8538983e-01 1.4610122e-02 1.1291022e-19]
 [8.0005958e-04 9.9919993e-01 1.9730099e-19]
 [4.4965363e-06 9.9999547e-01 8.5297088e-20]
 [4.1539934e-05 9.9995852e-01 1.7868441e-18]
 [1.1366981e-06 9.9999881e-01 1.2092545e-17]
 [9.8667800e-01 1.3322039e-02 2.3423007e-20]
 [5.5022508e-01 4.4977495e-01 3.9075425e-19]
 [9.9956471e-01 4.3531132e-04 6.4964606e-22]
 [4.2609394e-02 9.5739067e-01 2.9066076e-16]
 [9.9999750e-01 2.5123177e-06 2.4193495e-26]
 [9.2829382e-01 7.1706139e-02 6.7411194e-20]
 [9.6744823e-01 3.2551762e-02 8.7655626e-19]
 [1.9196355e-01 8.0803645e-01 1.8185909e-16]
 [7.3435402e-01 2.6564601e-01 2.8892908e-20]]


In [65]:
# validate the prediction
print("the test labels were {}".format(y_test[1:20]))

the test labels were [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
