In [57]:
import tensorflow as tf
print("Num CPUs Available: ", len(tf.config.list_physical_devices('CPU')))
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num CPUs Available:  1
Num GPUs Available:  1


In [None]:
load the data

In [58]:
import pandas as pd

digits_training_data = pd.read_csv('./train.csv')

# X is all the features I will be training the dataset on 
X = digits_training_data.copy()
# y is the prediction target
# pop label from the X and at the same time set the target
y = X.pop('label')

# X.head()
# y.head()

In [59]:
from sklearn.model_selection import train_test_split

# use part of the data from training and validation
X_train, X_valid, y_train, y_valid = train_test_split(X, y, stratify=y, train_size=0.75)

In [86]:
from tensorflow import keras
from tensorflow.keras import layers

# input_shape is the shape of the input data
model = keras.Sequential([
    layers.BatchNormalization(input_shape=(784,)),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),

    layers.Dense(10),
])

In [87]:
# min_delta is the minimum change in the monitored quantity to qualify as an improvement, which is used for early stopping.
# patience is the number of epochs with no improvement after which training will be stopped.

early_stopping = keras.callbacks.EarlyStopping(
    patience=20,
    min_delta=0.001,
    restore_best_weights=True,
)

optimizer = keras.optimizers.Adam(lr=0.001)
# optimizer is the algorithm used to update the weights of the model.
# loss is the loss function to be minimized by the model.
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# batch size is the number of samples (rows) that will be processed at a time
# epochs is the number of times the entire dataset will be processed
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=128,
    epochs=1000,
    callbacks=[early_stopping],
)

import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('animation', html='html5')

history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot()
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()))



Epoch 1/1000


ValueError: in user code:

    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\training.py:862 train_function  *
        return step_function(self, iterator)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\training.py:852 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\training.py:845 run_step  **
        outputs = model.train_step(data)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\training.py:803 train_step
        loss = self.compiled_loss(
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\losses.py:155 __call__
        losses = call_fn(y_true, y_pred)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\losses.py:259 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\losses.py:1679 categorical_crossentropy
        return backend.categorical_crossentropy(
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\backend.py:4875 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    C:\Users\laimo\anaconda3\envs\tf\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1161 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1) and (None, 10) are incompatible


In [84]:
digits_training_data = pd.read_csv('./test.csv')
test_X = digits_training_data.copy()

prediction = model.predict(test_X)
print(prediction)
# output = pd.DataFrame({'imageId': digits_training_data.Id,
#                        'Label': prediction})
# output.to_csv('submission.csv', index=False)
# X.head()
# y.head()


[[1.6659906  1.600827   1.6638818  ... 1.5669943  1.6861341  1.4967502 ]
 [0.99969965 1.1424934  1.0728575  ... 1.0411243  1.1603224  0.8352481 ]
 [8.342519   8.413309   8.255924   ... 8.316984   8.273986   8.158829  ]
 ...
 [2.8418589  2.7785451  2.7138045  ... 2.7604265  2.7793896  2.8269858 ]
 [9.283175   9.3799095  9.280408   ... 9.237655   9.280862   9.342032  ]
 [1.4814007  1.4663504  1.4826678  ... 1.530016   1.5210544  1.5423166 ]]
