# Intro
Basically this environment is to essay different things from preprocessing to everything else. 

In [1]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow import keras
from tensorflow.keras import layers

def set_seed(seed=35643419):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed(35643419)

In [2]:
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [3]:
#train.head()

In [4]:
#We have more or less the same distribution of numbers
#sns.histplot(train.label)

In [5]:
labels = train['label']
pixels = train.drop('label', axis=1)

images = pixels.values.reshape(-1, 28, 28)

#for i in [0,1,2,3,4]:
 #   plt.imshow(images[i], cmap='gray')
  #  plt.title(f"Label: {labels[i]}")
   # plt.show()

In [6]:
pixels_train, pixels_valid, labels_train, labels_valid = train_test_split(
    pixels,  
    labels,  
    test_size=0.3,
    random_state=35643419 
)

In [7]:
images_train = pixels_train.values.reshape(-1, 28, 28)
images_valid = pixels_valid.values.reshape(-1,28,28)

In [8]:
labels_train_one_hot = tf.keras.utils.to_categorical(labels_train, num_classes=10)
labels_valid_one_hot = tf.keras.utils.to_categorical(labels_valid, num_classes=10)

In [9]:
images_train_with_channel = images_train[..., np.newaxis]
images_valid_with_channel = images_valid[..., np.newaxis]

In [10]:
model = keras.Sequential([
    #Data augmentation
    preprocessing.Rescaling(scale=1./255),
    #Filter + detect : obtain 64 features, moving 1 pixel in the two dimensions, treating borders equally (same)
    layers.Conv2D(filters=124,kernel_size=3,strides=1,padding='same',activation='relu', input_shape=(28, 28, 1)),
    #Condense features
    layers.MaxPool2D(pool_size=2,strides=1,padding='same'),  
    layers.Flatten(),
    layers.Dense(10, activation='softmax'),
])

In [11]:
model.compile(
    optimizer='rmsprop',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

In [12]:
history = model.fit(
    images_train_with_channel, 
    labels_train_one_hot,
    validation_data=(images_valid_with_channel, labels_valid_one_hot),
    epochs=20
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [13]:
history_frame = pd.DataFrame(history.history)

In [14]:
#history_frame.loc[5:, ['loss', 'val_loss']].plot()
#history_frame.loc[5:, ['accuracy', 'val_accuracy']].plot()

In [15]:
valid_loss, valid_acc = model.evaluate(images_valid_with_channel, labels_valid_one_hot)
print('valid_acc:', valid_acc)

valid_acc: 0.9843651056289673


In [16]:
images_test = test.values.reshape(-1, 28, 28)
images_test_with_channel = images_test[..., np.newaxis]

In [17]:
pred = model.predict(images_test)



In [18]:
predicted_classes = np.argmax(pred, axis=1)

In [19]:
image_ids = np.arange(1, len(predicted_classes) + 1)

In [20]:
df_submission = pd.DataFrame({
    'ImageId': image_ids,
    'Label': predicted_classes
})

In [21]:
df_submission.to_csv('submission2.csv', index=False)