In [80]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Dense, Dropout

train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [81]:
y_train = train["label"]
x_train = train.drop(labels=["label"], axis=1)

In [82]:
x_train = x_train.values / 255.0
x_test = test.values / 255.0

print("Shape of x_train:", x_train.shape)  
print("Shape of x_val:", x_test.shape)     

Shape of x_train: (42000, 784)
Shape of x_val: (28000, 784)


In [83]:
y_train = to_categorical(y_train, num_classes=10)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=0)

In [84]:
nn_model = tf.keras.Sequential([
    Input(shape=(784,)),
    Dense(784, activation="relu"),
    Dropout(0.25),
    Dense(800, activation="relu"),
    Dense(10, activation="softmax")
])
nn_model.summary()

In [85]:
nn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [86]:
history = nn_model.fit(x_train, y_train, batch_size=256, epochs=10, validation_data=(x_val, y_val))

Epoch 1/10
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.8082 - loss: 0.6273 - val_accuracy: 0.9548 - val_loss: 0.1614
Epoch 2/10
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.9594 - loss: 0.1281 - val_accuracy: 0.9665 - val_loss: 0.1167
Epoch 3/10
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 25ms/step - accuracy: 0.9731 - loss: 0.0836 - val_accuracy: 0.9695 - val_loss: 0.1006
Epoch 4/10
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.9796 - loss: 0.0613 - val_accuracy: 0.9756 - val_loss: 0.0837
Epoch 5/10
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.9839 - loss: 0.0472 - val_accuracy: 0.9735 - val_loss: 0.0935
Epoch 6/10
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.9876 - loss: 0.0361 - val_accuracy: 0.9768 - val_loss: 0.0883
Epoch 7/10
[1m132/132

In [87]:
# predict results
results = nn_model.predict(test)

# select the indix with the maximum probability
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


In [88]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv('/kaggle/working/submission.csv', index=False)
submission.info

<bound method DataFrame.info of        ImageId  Label
0            1      2
1            2      0
2            3      9
3            4      9
4            5      3
...        ...    ...
27995    27996      9
27996    27997      7
27997    27998      3
27998    27999      9
27999    28000      2

[28000 rows x 2 columns]>