In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.backend import clear_session
import tensorflow.keras.layers as L
import numpy as np
import pandas as pd
import sklearn
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv


## Data preprocessing

In [2]:
train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
sample = pd.read_csv("/kaggle/input/digit-recognizer/sample_submission.csv")

In [3]:
clear_session()

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

x_train = train[train.columns[1:]]
y_train = train.label

x_train_float = x_train.astype(np.float) / 255 - 0.5 # нормализация данных
x_test_float = test.astype(np.float) / 255 - 0.5

y_train_oh = keras.utils.to_categorical(y_train, 10) # преобразование y_train в бинарную матрицу классов

x_train_float = x_train_float.values.reshape(-1, 28, 28) # подводка данных к нужной размерности
x_test_float = x_test_float.values.reshape(-1, 28, 28)


x_train_float,x_train_float_val,y_train_oh, y_train_oh_val = train_test_split(x_train_float, y_train_oh, random_state = 49, test_size = 0.3) 

## Deep learning

In [5]:
# Архитектура сети
model = Sequential()
model.add(L.Conv2D(16, kernel_size=3, strides=1, padding='same', input_shape=(28, 28, 1)))
model.add(L.MaxPool2D())
model.add(L.Conv2D(32, kernel_size=3, strides=1, padding='same'))
model.add(L.MaxPool2D())
model.add(L.Conv2D(64, kernel_size=3, strides=1, padding='same'))
model.add(L.MaxPool2D())
model.add(L.Flatten())
model.add(Dense(64, activation='relu', input_shape=(x_train.shape[1],))) # первый скрытый слой
model.add(Dense(32, activation='relu',)) # второй скрытый слой
model.add(Dense(10, activation='softmax',)) # выходной слой
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 16)        160       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 32)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 64)          18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 576)               0

In [6]:
model.compile(
    loss='categorical_crossentropy',  # минимизируем кросс-энтропию
    optimizer='adam',  # так будет быстрее
    metrics=['accuracy']  # выводим процент правильных ответов
    
)

In [7]:
history = model.fit(
                    x_train_float[:, :, :, np.newaxis], 
                    y_train_oh,
                    batch_size=32,  # 32 объекта для подсчета градиента на каждом шаге
                    epochs=90,  # 90 проходов по датасету
                    validation_data = (x_train_float_val[:, :, :, np.newaxis], y_train_oh_val)
                    )

Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 65/90
Epoch 66/90
Epoch 67/90
Epoch 68/90
Epoch 69/90
Epoch 70/90
Epoch 71/90
Epoch 72/90
Epoch 73/90
Epoch 74/90
Epoch 75/90
Epoch 76/90
Epoch 77/90
Epoch 78/90
Epoch 79/90
Epoch 80/90
Epoch 81/90
Epoch 82/90
Epoch 83/90
Epoch 84/90
E

## Output

In [8]:
predictions = model.predict_classes(x_test_float[:, :, :, np.newaxis])

output = pd.DataFrame({'ImageId': sample.ImageId, 'Label': predictions})
output.to_csv('my_submission.csv', index=False)