In [None]:
# Importing tenserflow libraries
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_datasets as tfds
import os

In [None]:
# Importing library for plots
import matplotlib.pyplot as plt

In [None]:
# Importing pandas and numpy
import pandas as pd
import numpy as np

In [None]:
# Get kaggle files directories
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Loading csv data into pandas dataframes
df_train = pd.read_csv('/kaggle/input/dmnist-2021/train.csv')  
df_test = pd.read_csv('/kaggle/input/dmnist-2021/test.csv')
df_sub = pd.read_csv('/kaggle/input/dmnist-2021/sample_submission.csv')

In [None]:
# one image = 28 pixels
pix = 28

In [None]:
# Getting list of transformed loaded values from dataset
values = []
for i in range(df_train.shape[0]):
  initial = df_train.iloc[i, 2:]
  reshaped = np.array(initial).reshape((pix, pix, 1))
  values.append(reshaped)
values = np.array(values)

In [None]:
# x - input images, y - labels from training dataset
x_all = np.zeros(shape=(values.shape[0], pix, pix, 1))
y_all = np.array(df_train.iloc[:, 1])

In [None]:
# Writing processed values into numpy format array
def gen_x_train():
  for index, image in enumerate(values):
    x_all[index] = image
  return x_all

In [None]:
x_all = gen_x_train()

In [None]:
# Check that one image is correct
plt.imshow(x_all[55])
print(y_all[55])

In [None]:
# Data preparation for training
def prep_training(x_all, y_all):
  x_train = x_all[1000:]
  y_train = y_all[1000:]
  return x_train, y_train

In [None]:
x_train, y_train = prep_training(x_all, y_all)

In [None]:
# Creating model architecture, setting parameters: loss function, metrics and optimizer. 
# Start training for 100 epochs.
mdl = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=(pix, pix, 1)), 
                           tf.keras.layers.Dense(128, activation='relu'),
                           tf.keras.layers.Dense(10, activation="softmax")])
mdl.compile(optimizer=tf.keras.optimizers.RMSprop(), 
            loss=tf.keras.losses.SparseCategoricalCrossentropy(), 
            metrics=['accuracy'])
mdl.fit(x_train, y_train, epochs=100, batch_size=64)

In [None]:
# Data preparation for validation
def prep_validation(x_train, y_train):
  x_test = x_train[:1000]
  y_test = y_train[:1000]
  return x_test, y_test

In [None]:
x_test, y_test = prep_training(x_all, y_all)

In [None]:
# Check trained model on untrained data
mdl.evaluate(x_test, y_test)

In [None]:
# Predict data from test.csv
values_test = []
for i in range(df_test.shape[0]):
  initial = df_test.iloc[i, 1:]
  reshaped = np.array(initial).reshape((pix, pix, 1))
  values_test.append(reshaped)
values_test = np.array(values_test)

for index, image in enumerate(values_test[:10]):
  res = np.argmax(mdl.predict(image[np.newaxis, ...]))
  df_sub.iloc[index, 1] = res

In [None]:
# Checking one image
plt.imshow(values_test[55])
print(df_sub.iloc[55])

In [None]:
# Save result for kaggle evaluation
df_sub.to_csv("rostyslav_koval_submission_21112021.csv",index=False)

In [None]:
# Check a part of df_sub
df_sub