In [18]:
# imports
import tensorflow as tf 
import numpy as np 
from sklearn.datasets import load_wine
from sklearn.utils import shuffle
from sklearn.preprocessing import OneHotEncoder

print("TF version {}".format(tf.__version__))
print("numpy version {}".format(np.__version__))
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

TF version 2.1.0
numpy version 1.18.1
Eager mode:  True
GPU is NOT AVAILABLE


In [4]:
def get_train_test_validate(data_array, test_percent, validate_percent):
    index = len(data_array)
    print("the train max index is {}".format(index))

    # get the test/train split
    index = int(index * test_percent)
    train_data = data_array[:index]
    test_data = data_array[index:]
    print("the test split index is {}".format(index))

    # get the train/validate split
    index = int(index * validate_percent)
    validate_data = train_data[index:]
    train_data = train_data[:index]
    print("the validate split index is {}".format(index))

    # return
    return train_data, test_data, validate_data


In [8]:
# load the wine data
wine_df = load_wine()

print("the keys are {} and the df of type {}".format(wine_df.keys(), type(wine_df)))

the keys are dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names']) and the df of type <class 'sklearn.utils.Bunch'>


In [None]:
# shuffle the data


In [13]:
# get the features and labels
Xstart = wine_df.data
ystart = wine_df.target

print("the full features are of type {} and shape {}".format(type(Xstart), Xstart.shape))
print("the full labels are of type {} and shape {}".format(type(ystart), ystart.shape))



the full features are of type <class 'numpy.ndarray'> and shape (178, 13)
the full labels are of type <class 'numpy.ndarray'> and shape (178,)


In [14]:
# get new shuffled data
X, y = shuffle(Xstart, ystart, random_state = 3)

In [19]:
# print the first 20 elements
index = 10
print("the first {} of the data is {}".format(index, X[:index]))
print("the first {} of the targets is {}".format(index, y[:index]))

the first 10 of the data is [[1.356e+01 1.710e+00 2.310e+00 1.620e+01 1.170e+02 3.150e+00 3.290e+00
  3.400e-01 2.340e+00 6.130e+00 9.500e-01 3.380e+00 7.950e+02]
 [1.377e+01 1.900e+00 2.680e+00 1.710e+01 1.150e+02 3.000e+00 2.790e+00
  3.900e-01 1.680e+00 6.300e+00 1.130e+00 2.930e+00 1.375e+03]
 [1.351e+01 1.800e+00 2.650e+00 1.900e+01 1.100e+02 2.350e+00 2.530e+00
  2.900e-01 1.540e+00 4.200e+00 1.100e+00 2.870e+00 1.095e+03]
 [1.161e+01 1.350e+00 2.700e+00 2.000e+01 9.400e+01 2.740e+00 2.920e+00
  2.900e-01 2.490e+00 2.650e+00 9.600e-01 3.260e+00 6.800e+02]
 [1.348e+01 1.670e+00 2.640e+00 2.250e+01 8.900e+01 2.600e+00 1.100e+00
  5.200e-01 2.290e+00 1.175e+01 5.700e-01 1.780e+00 6.200e+02]
 [1.285e+01 1.600e+00 2.520e+00 1.780e+01 9.500e+01 2.480e+00 2.370e+00
  2.600e-01 1.460e+00 3.930e+00 1.090e+00 3.630e+00 1.015e+03]
 [1.438e+01 3.590e+00 2.280e+00 1.600e+01 1.020e+02 3.250e+00 3.170e+00
  2.700e-01 2.190e+00 4.900e+00 1.040e+00 3.440e+00 1.065e+03]
 [1.349e+01 1.660e+00 2.240

In [17]:
index = 20
print("first {} targets are {}".format(index, y[:index]))

first 20 targets are [0 0 0 1 2 0 0 1 1 0 1 0 1 1 0 0 1 0 2 2]


In [24]:
# one hot the target
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
y_onehot = y.reshape(len(y), 1)

index = 20
print("encoded of shape {} is now {}".format(y_onehot.shape, y_onehot[:index]))

y_onehot = onehot_encoder.fit_transform(y_onehot)
print("first {} targets are {}".format(index, y_onehot[:index]))

encoded of shape (178, 1) is now [[0]
 [0]
 [0]
 [1]
 [2]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [2]
 [2]]
first 20 targets are [[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [25]:
# get the train/test data
X_train, X_test, X_validate = get_train_test_validate(X, 0.8, 0.8)

print("the train dataset if of type {} and shape {}".format(type(X_train), X_train.shape))
print("the test dataset if of type {} and shape {}".format(type(X_test), X_test.shape))
print("the validate dataset if of type {} and shape {}".format(type(X_validate), X_validate.shape))

the train max index is 178
the test split index is 142
the validate split index is 113
the train dataset if of type <class 'numpy.ndarray'> and shape (113, 13)
the test dataset if of type <class 'numpy.ndarray'> and shape (36, 13)
the validate dataset if of type <class 'numpy.ndarray'> and shape (29, 13)


In [27]:
# split the labels into train/test/validate labels
y_train, y_test, y_validate = get_train_test_validate(y_onehot, 0.8, 0.8)

print("the train labels if of type {} and shape {}".format(type(y_train), y_train.shape))
print("the test labels if of type {} and shape {}".format(type(y_test), y_test.shape))
print("the validate labels if of type {} and shape {}".format(type(y_validate), y_validate.shape))

the train max index is 178
the test split index is 142
the validate split index is 113
the train labels if of type <class 'numpy.ndarray'> and shape (113, 3)
the test labels if of type <class 'numpy.ndarray'> and shape (36, 3)
the validate labels if of type <class 'numpy.ndarray'> and shape (29, 3)


In [28]:
# build the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation= 'relu', input_shape=(13,)),
    # tf.keras.layers.AveragePooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1400      
_________________________________________________________________
dense_1 (Dense)              (None, 30)                3030      
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 93        
Total params: 4,523
Trainable params: 4,523
Non-trainable params: 0
_________________________________________________________________


In [33]:
# compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# will only work if lables are one hot matrix?
# from keras.utils import to_categorical
# y_binary = to_categorical(y_int)
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# option from the TF classification example
# model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])



In [34]:
# fit the model
number_epochs=20

model.fit(X_train, y_train, epochs=number_epochs, batch_size=10, validation_data=(X_validate, y_validate), verbose=1)



Train on 113 samples, validate on 29 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7ff2c9659090>

In [36]:
# predict
prediction = model.predict(X_test[1:10])

print("the prediction of type {} was {}".format(type(prediction), prediction))
print("the targets are {}".format(y_test[1:10]))

the prediction of type <class 'numpy.ndarray'> was [[2.12237135e-01 5.36971763e-02 7.34065711e-01]
 [9.98978138e-01 3.12618283e-07 1.02155155e-03]
 [2.21382664e-03 5.92592597e-01 4.05193567e-01]
 [2.06244318e-03 5.65310299e-01 4.32627350e-01]
 [9.95716631e-01 6.75372632e-07 4.28261748e-03]
 [9.99677181e-01 1.33400064e-08 3.22782813e-04]
 [1.63656533e-01 1.15219034e-01 7.21124411e-01]
 [1.58569927e-03 6.68090954e-03 9.91733432e-01]
 [1.39944544e-02 7.07490683e-01 2.78514892e-01]]
the targets are [[1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]]


In [38]:
# format thwe results
float_formatter = "{:.3f}".format
np.set_printoptions(formatter={'float_kind':float_formatter})

print("the prediction of type {} was {}".format(type(prediction), prediction))
print("the targets are {}".format(y_test[1:10]))

the prediction of type <class 'numpy.ndarray'> was [[0.212 0.054 0.734]
 [0.999 0.000 0.001]
 [0.002 0.593 0.405]
 [0.002 0.565 0.433]
 [0.996 0.000 0.004]
 [1.000 0.000 0.000]
 [0.164 0.115 0.721]
 [0.002 0.007 0.992]
 [0.014 0.707 0.279]]
the targets are [[1.000 0.000 0.000]
 [1.000 0.000 0.000]
 [0.000 1.000 0.000]
 [0.000 1.000 0.000]
 [1.000 0.000 0.000]
 [1.000 0.000 0.000]
 [0.000 1.000 0.000]
 [0.000 0.000 1.000]
 [0.000 1.000 0.000]]


In [65]:
# validate the prediction
print("the test labels were {}".format(y_test[1:20]))

the test labels were [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
