In [32]:
import logging

import pandas as pd
import numpy as np

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, MaxPool2D
from keras import backend as K
import matplotlib.pyplot as plt

# the data, split between train and test sets
from keras.utils import np_utils
from matplotlib import pyplot
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.python.keras import Sequential
from tensorflow.python.client import device_lib

# from tensorflow.python.keras.optimizers import SGD

In [33]:
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(funcName)20s() - %(message)s', datefmt='%d-%b-%y %H:%M:%S',
    handlers=[logging.StreamHandler()]
)
log = logging.getLogger(__name__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(device_lib.list_local_devices())

Num GPUs Available:  1
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 263772746669355802
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 5717884928
locality {
  bus_id: 1
  links {
  }
}
incarnation: 17217770424582476533
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:08:00.0, compute capability: 8.6"
xla_global_id: 416903419
]


In [34]:
def initialize_data():
     # This method should eventually be changed to become an SQL DB instead
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
    X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')

    # # One hot Code
    y_train = np_utils.to_categorical(y_train)
    y_test = np_utils.to_categorical(y_test)
    num_classes = y_test.shape[1]

    # convert from integers to floats
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    # normalize to range [0, 1]
    X_train = (X_train / 255.0)
    X_test = (X_test / 255.0)

    return (X_train, y_train), (X_test, y_test)

In [35]:
def generate_dataframe(X_train, y_train, X_test, y_test):
    log.debug("Creating train Dataframe")

    train_df = pd.DataFrame(columns=["PixelArray", "Result"])
    for image in range(0, len(X_train)):
        dict_row = {"PixelArray": [X_train[image]], "Result": [y_train[image]]}
        df_row = pd.DataFrame(dict_row)
        train_df = pd.concat([train_df, df_row], ignore_index=True)

    log.debug("Successfully created train Dataframe")
    log.debug("Creating test Dataframe")

    test_df = pd.DataFrame(columns=["PixelArray", "Result"])
    for image in range(0, len(X_test)):
        dict_row = {"PixelArray": [X_test[image]], "Result": [y_test[image]]}
        df_row = pd.DataFrame(dict_row)
        test_df = pd.concat([test_df, df_row], ignore_index=True)

    log.debug("Successfully created test Dataframe")
    #
    # log.debug("Number of training rows = {}".format(len(train_df)))
    # log.debug("Number of nested arrays = {}".format(len(train_df.loc[0]["PixelArray"])))
    # log.debug("Length of inner array = {}".format(len(train_df.loc[0]["PixelArray"][0])))
    #
    # log.debug("Number of testing rows = {}".format(len(test_df)))
    # log.debug("Number of nested arrays = {}".format(len(test_df.loc[0]["PixelArray"])))
    # log.debug("Length of inner array = {}".format(len(test_df.loc[0]["PixelArray"][0])))

    return (train_df, test_df)

In [36]:
def generate_mnist_tuples(train_df: pd.DataFrame, test_df: pd.DataFrame):
    reconstructed_x_train = train_df["PixelArray"].to_numpy().tolist()
    reconstructed_x_test = test_df["PixelArray"].to_numpy().tolist()

    reconstructed_y_train = train_df["Result"].to_numpy().tolist()
    reconstructed_y_test = test_df["Result"].to_numpy().tolist()
    # log.info(reconstructed_x_train)
    # (X_train, y_train), (X_test, y_test) = mnist.load_data()

    log.debug("Length of reconstructed_x_train = {}".format(len(reconstructed_x_train)))
    log.debug("Length of reconstructed_x_train[0] = {}".format(len(reconstructed_x_train[0])))
    log.debug("Length of reconstructed_x_train[0][0] = {}".format(len(reconstructed_x_train[0][0])))
    # tuple_comparator(reconstructed_x_train, X_train)

    return (reconstructed_x_train, reconstructed_y_train), (reconstructed_x_test, reconstructed_y_test)

In [37]:
(X_train, y_train), (X_test, y_test) = initialize_data()

In [38]:
print(y_train)

[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]


In [39]:
train_df, test_df = generate_dataframe(X_train, y_train, X_test, y_test)
train_df.head()

24-Jan-23 19:19:36 -   generate_dataframe() - Creating train Dataframe
24-Jan-23 19:19:57 -   generate_dataframe() - Successfully created train Dataframe
24-Jan-23 19:19:57 -   generate_dataframe() - Creating test Dataframe
24-Jan-23 19:19:59 -   generate_dataframe() - Successfully created test Dataframe


Unnamed: 0,PixelArray,Result
0,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ..."
1,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
3,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [40]:
def generate_mnist_tuples(train_df: pd.DataFrame, test_df: pd.DataFrame):
    reconstructed_x_train = train_df["PixelArray"].to_numpy().tolist()
    reconstructed_x_test = test_df["PixelArray"].to_numpy().tolist()
    reconstructed_y_train = train_df["Result"].to_numpy().tolist()
    reconstructed_y_test = test_df["Result"].to_numpy().tolist()
    # log.info(reconstructed_x_train)
    # (X_train, y_train), (X_test, y_test) = mnist.load_data()

    log.debug("Length of reconstructed_x_train = {}".format(len(reconstructed_x_train)))
    log.debug("Length of reconstructed_x_train[0] = {}".format(len(reconstructed_x_train[0])))
    log.debug("Length of reconstructed_x_train[0][0] = {}".format(len(reconstructed_x_train[0][0])))
    # tuple_comparator(reconstructed_x_train, X_train)

    return (reconstructed_x_train, reconstructed_y_train), (reconstructed_x_test, reconstructed_y_test)

In [41]:
(reconstructed_x_train, reconstructed_y_train), (reconstructed_x_test, reconstructed_y_test) = generate_mnist_tuples(train_df, test_df)

24-Jan-23 19:19:59 - generate_mnist_tuples() - Length of reconstructed_x_train = 60000
24-Jan-23 19:19:59 - generate_mnist_tuples() - Length of reconstructed_x_train[0] = 28
24-Jan-23 19:19:59 - generate_mnist_tuples() - Length of reconstructed_x_train[0][0] = 28


In [42]:
reconstructed_x_train[0][0][0]

array([0.], dtype=float32)

In [43]:
X_train[0][0][0]

array([0.], dtype=float32)

In [44]:
type(reconstructed_x_train[0][0][0])

numpy.ndarray

In [45]:
type(X_train[0][0][0])

numpy.ndarray

In [46]:
type(X_train[0][0][0][0])

numpy.float32

In [47]:
type(reconstructed_x_train[0][0][0][0])

numpy.float32

In [48]:
def create_model():
    # Create model
    # Building CNN
    model = Sequential()
    # relu: rectified linear unit activation function
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(10, activation='softmax'))
    # model.summary()

    # compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [49]:
len(X_train)

60000

In [50]:
len(y_train)

60000

In [51]:
len(reconstructed_x_train)

60000

In [52]:
len(reconstructed_y_train)

60000

In [53]:
# reconstructed_y_train = np_utils.to_categorical(reconstructed_y_train)
# reconstructed_y_test = np_utils.to_categorical(reconstructed_y_test)
# num_classes = reconstructed_y_test.shape[1]

In [54]:
len(reconstructed_y_train)

60000

In [55]:
log.debug("Length of reconstructed_x_train = {}".format(len(reconstructed_x_train)))
log.debug("Length of reconstructed_x_train[0] = {}".format(len(reconstructed_x_train[0])))
log.debug("Length of reconstructed_x_train[0][0] = {}".format(len(reconstructed_x_train[0][0])))

log.debug("Length of reconstructed_y_train = {}".format(len(reconstructed_y_train)))
log.debug("Length of reconstructed_y_train[0] = {}".format(len(reconstructed_y_train[0])))

log.debug("Length of reconstructed_x_test = {}".format(len(reconstructed_x_test)))
log.debug("Length of reconstructed_x_test[0] = {}".format(len(reconstructed_x_test[0])))
log.debug("Length of reconstructed_x_test[0][0] = {}".format(len(reconstructed_x_test[0][0])))

log.debug("Length of reconstructed_y_test = {}".format(len(reconstructed_y_test)))
log.debug("Length of reconstructed_y_test[0] = {}".format(len(reconstructed_y_test[0])))

24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_x_train = 60000
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_x_train[0] = 28
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_x_train[0][0] = 28
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_y_train = 60000
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_y_train[0] = 10
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_x_test = 10000
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_x_test[0] = 28
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_x_test[0][0] = 28
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_y_test = 10000
24-Jan-23 19:20:00 -             <module>() - Length of reconstructed_y_test[0] = 10


In [56]:
model = create_model()
# https://stackoverflow.com/questions/65474081/valueerror-data-cardinality-is-ambiguous-make-sure-all-arrays-contain-the-same
model.fit(np.array(reconstructed_x_train), np.array(reconstructed_y_train), validation_data=(np.array(reconstructed_x_test), np.array(reconstructed_y_test)), epochs=10, batch_size=200)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1fd22abdd00>

In [57]:
def test(X_train, model):
    test_images = X_train[1:5]
    test_images = test_images.reshape(test_images.shape[0], 28, 28)

    for i, test_image in enumerate(test_images, start=1):
        org_image = test_image
        test_image = test_image.reshape(1, 28, 28, 1)
        prediction = model.predict_classes(test_image, verbose=0)

        print("Predicted digit: {}".format(prediction[0]))
        plt.subplot(220 + i)
        plt.axis('off')
        plt.title("Predicted digit: {}".format(prediction[0]))
        plt.imshow(org_image, cmap=plt.get_cmap('gray'))

    plt.show()

In [58]:
# # TEST
# # for images alreday
# test(X_train, model)
#