In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
import math
%matplotlib inline

In [None]:
data = pd.read_csv("/kaggle/input/digit-recognizer/train.csv") 

train_data, test_data = train_test_split(data, test_size=0.3)      #data for cheking and testing
train_data, valid_data = train_test_split(train_data, test_size=0.2)

pred_data = pd.read_csv("/kaggle/input/digit-recognizer/test.csv") #data for predictions

In [None]:
print(f"Number of train examples: {train_data.shape[0]}")
print(f"Number of test exmaples {test_data.shape[0]}")
print(f"Number of valid exmaples {valid_data.shape[0]}")
print(f"Number of prediction examples: {pred_data.shape[0]}")

In [None]:
class_list = list(np.sort(train_data['label'].unique()))
print(f"Class list of numbers: {class_list}")

In [None]:
sns.set_theme(style="darkgrid")
sns.countplot(x=train_data["label"])

In [None]:
x_train = (train_data.iloc[:, 1:].values).astype('float32')
y_train = train_data.iloc[:, 0].values.astype('int32')

x_test = (test_data.iloc[:, 1:].values).astype('float32')
y_test = test_data.iloc[:, 0].values.astype('int32')

x_valid = (valid_data.iloc[:, 1:].values).astype('float32')
y_valid = valid_data.iloc[:, 0].values.astype('int32')

x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32') / 255
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32') / 255
x_valid = x_valid.reshape(x_valid.shape[0], 28, 28, 1).astype('float32') / 255

In [None]:
x_pred = pred_data.values.astype('float32')
x_pred = x_pred.reshape(x_pred.shape[0], 28, 28, 1).astype('float32') / 255

In [None]:
num_train_examples = x_train.shape[0]
num_test_examples = x_test.shape[0]
num_pred_examples = x_pred.shape[0]

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), padding='same', activation=tf.nn.relu,
                           input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2), strides=2),
    tf.keras.layers.Conv2D(64, (3,3), padding='same', activation=tf.nn.relu),
    tf.keras.layers.MaxPooling2D((2, 2), strides=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

In [None]:
BATCH_SIZE = 32

model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
model.fit(
    x=x_train, 
    y=y_train, 
    epochs=10, 
    steps_per_epoch=math.ceil(num_train_examples/BATCH_SIZE),
    validation_data = (x_valid, y_valid))

In [None]:
test_loss, test_accuracy = model.evaluate(x_test, y_test, steps=math.ceil(num_test_examples/BATCH_SIZE))
print('Accuracy on test dataset:', test_accuracy)

In [None]:
predictions_test = model.predict(x_test, verbose=0)
test_images = x_test
test_labels = y_test

In [None]:
def plot_image(i, predictions_array, true_labels, images):
    predictions_array, true_label, img = predictions_array[i], true_labels[i], images[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    plt.imshow(img[...,0], cmap=plt.cm.binary)

    predicted_label = np.argmax(predictions_array)
    if predicted_label == true_label:
        color = 'blue'
    else:
        color = 'red'

    plt.xlabel("{} {:2.0f}% ({})".format(predicted_label,
                                    100*np.max(predictions_array),
                                    true_label),
                                    color=color)

In [None]:
num_rows = 5
num_cols = 5
num_images = num_rows*num_cols
plt.figure(figsize=(2*num_cols, 2*num_rows))

for i in range(num_images):
    plt.subplot(num_rows, num_cols, i+1)
    plot_image(i, predictions_test, test_labels, test_images)

In [None]:
def plot_image_pred(i, predictions_array, images):
    predictions_array, img = predictions_array[i], images[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    plt.imshow(img[...,0], cmap=plt.cm.binary)

    predicted_label = np.argmax(predictions_array)

    plt.xlabel("{} {:2.0f}%".format(predicted_label,
                                    100*np.max(predictions_array)),
                                    color='blue')

In [None]:
predictions = model.predict(x_pred, verbose=0)
pred_images = x_pred

In [None]:
num_rows = 5
num_cols = 5
num_images = num_rows*num_cols
plt.figure(figsize=(2*num_cols, 2*num_rows))

for i in range(num_images):
    plt.subplot(num_rows, num_cols, i+1)
    plot_image_pred(i, predictions, pred_images)

In [None]:
solution = pd.read_csv("/kaggle/input/digit-recognizer/sample_submission.csv") 
solution["Label"] = pd.Series(np.argmax(predictions, axis=1))
solution.to_csv("sample_submission.csv", index=False)