# Intro
Basically this environment is to essay different things from preprocessing to everything else. 

In [None]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def set_seed(seed=35643419):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed(35643419)

In [None]:
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [None]:
#train.head()

In [None]:
#We have more or less the same distribution of numbers
#sns.histplot(train.label)

In [None]:
labels = train['label']
pixels = train.drop('label', axis=1)

images = pixels.values.reshape(-1, 28, 28)

#for i in [0,1,2,3,4]:
 #   plt.imshow(images[i], cmap='gray')
  #  plt.title(f"Label: {labels[i]}")
   # plt.show()

In [None]:
'''
datagen = ImageDataGenerator(
    shear_range=0.2,
    zoom_range=0.2,
    vertical_flip=True,
)
'''

In [None]:
pixels_train, pixels_valid, labels_train, labels_valid = train_test_split(
    pixels,  
    labels,  
    test_size=0.3,
    random_state=35643419 
)

In [None]:
images_train = pixels_train.values.reshape(-1, 28, 28)
images_valid = pixels_valid.values.reshape(-1,28,28)

In [None]:
labels_train_one_hot = tf.keras.utils.to_categorical(labels_train, num_classes=10)
labels_valid_one_hot = tf.keras.utils.to_categorical(labels_valid, num_classes=10)

In [None]:
images_train_with_channel = images_train[..., np.newaxis]
images_valid_with_channel = images_valid[..., np.newaxis]

In [None]:
'''
images_train_with_channel = datagen.flow(
    x=images_train_with_channel,
    y=labels_train_one_hot,
    batch_size=32,
    seed=35643419
)

images_valid_with_channel = datagen.flow(
    x=images_train_with_channel,
    y=labels_valid_one_hot,
    batch_size=32,
    seed=35643419
)
'''

In [None]:
#pretrained_base = tf.keras.models.load_model(
 #   '../input/cv-course-models/cv-course-models/inceptionv3',
#)
#pretrained_base.trainable = False

In [None]:
model = keras.Sequential([
    preprocessing.Rescaling(scale=1./255,input_shape=(28,28,1)),
    layers.Conv2D(filters=32,kernel_size=3,strides=1,padding='valid',activation='relu', input_shape=(28, 28, 1)),
    layers.ReLU(),
    layers.MaxPool2D(pool_size=2,strides=1,padding='valid'),

    layers.Conv2D(filters=64,kernel_size=3,strides=1,padding='valid',activation='relu'),
    layers.ReLU(),
    layers.MaxPool2D(pool_size=2,strides=1,padding='valid'),
    
    layers.Conv2D(filters=128, kernel_size=3, strides=1, padding='valid', activation='relu'),
    layers.MaxPool2D(pool_size=2, strides=1, padding='valid'),
    
    layers.Flatten(),
    layers.Dense(10, activation='softmax'),
])

In [None]:
model.compile(
    optimizer='rmsprop',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

In [None]:
early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)

In [None]:
model_checkpoint = keras.callbacks.ModelCheckpoint(
    'best_model.h5',  # Save the best weights to this file
    save_best_only=True,  # Only save the best weights based on the validation loss
    monitor='val_loss',  # Monitor validation loss
    mode='min',  # Mode can be 'min' or 'max' depending on the metric to minimize or maximize
    verbose=1  # Display messages when saving the best weights
)

In [None]:
history = model.fit(
    images_train_with_channel, 
    labels_train_one_hot,
    validation_data=(images_valid_with_channel, labels_valid_one_hot),
    epochs=20,
    callbacks=[early_stopping,model_checkpoint],
    batch_size=64
)

In [None]:
history_frame = pd.DataFrame(history.history)

In [None]:
history_frame.loc[1:, ['loss', 'val_loss']].plot()
history_frame.loc[1:, ['accuracy', 'val_accuracy']].plot()

In [None]:
best_model = keras.models.load_model('best_model.h5')

In [None]:
valid_loss, valid_acc = best_model.evaluate(images_valid_with_channel, labels_valid_one_hot)
print('valid_acc:', valid_acc)

In [None]:
images_test = test.values.reshape(-1, 28, 28)
images_test_with_channel = images_test[..., np.newaxis]

In [None]:
pred = best_model.predict(images_test)

In [None]:
predicted_classes = np.argmax(pred, axis=1)

In [None]:
image_ids = np.arange(1, len(predicted_classes) + 1)

In [None]:
df_submission = pd.DataFrame({
    'ImageId': image_ids,
    'Label': predicted_classes
})

In [None]:
df_submission.to_csv('deeper.csv', index=False)