# 导入数据

In [1]:
import pandas as pd
train_data = pd.read_csv('../input/digit-recognizer/train.csv')
test_data = pd.read_csv('../input/digit-recognizer/test.csv')

In [2]:
train_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
test_data.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# 拆分训练集

In [4]:
y_train = train_data[['label']]
x_train = train_data.drop(['label'], axis=1)

In [5]:
x_train.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
y_train.head()

Unnamed: 0,label
0,1
1,0
2,1
3,4
4,0


# 将 1 维图像转换为 2 维图像

In [7]:
x_train.shape, test_data.shape

((42000, 784), (28000, 784))

In [8]:
# 将数据转换为 28 × 28 像素的图像
x_train = x_train.values.reshape(x_train.shape[0], 28, 28)
x_test = test_data.values.reshape(test_data.shape[0], 28, 28)

# 添加色彩通道维度

In [9]:
x_train = x_train.reshape(-1, 28, 28, 1)
x_test  = x_test.reshape(-1, 28, 28, 1)

print('Train Data shape      :', x_train.shape)
print('Test Data shape       :', x_test.shape) 

Train Data shape      : (42000, 28, 28, 1)
Test Data shape       : (28000, 28, 28, 1)


# 归一化

In [10]:
x_train, x_test = x_train / 255, x_test / 255

# 对标签进行独热编码

In [11]:
import tensorflow as tf
y_train = tf.keras.utils.to_categorical(y_train)

In [12]:
y_train.shape

(42000, 10)

# 模型定义

In [13]:
# 超参数设定 
batch = 42   # 将训练集分成 42 份
epochs = 20  # 将小批次的训练集训练 20 次
steps_per_epoch = x_train.shape[0] // batch   # 模型每次训练数量
seed = 4       # 随机种子数
tf.random.set_seed(4)
lr_rate = 0.001  # 学习率

In [14]:
# 数据增强
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=10,
                             zoom_range=0.15
                            )

image_generator = datagen.flow(x_train, y_train, 
                               batch_size=batch, 
                               seed=seed,
                              )

In [15]:
# 定义模型
from tensorflow.keras import layers, models
model = models.Sequential([
    layers.Conv2D(filters=32, kernel_size=(2, 2), activation='relu', input_shape = (28,28,1)),         # 卷积层
    layers.Conv2D(32, (4, 4), activation = 'relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    
    layers.Conv2D(filters=64, kernel_size=(2, 2), activation='relu'),         # 卷积层
    layers.Conv2D(64, (4, 4), activation = 'relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),                                             # 最大池化
    
    layers.Conv2D(128, (3, 3), activation = 'relu'),
    layers.BatchNormalization(),
    
    layers.Flatten(),                        
    layers.Dense(150, activation = tf.nn.relu),     # 隐藏层
    layers.Dense(150, activation = tf.nn.relu),      # 隐藏层
    
    # 分类为 0 - 9 的数字标签，所以输出有 10 种可能，并且使用 softmax 激活函数表示概率的大小
    layers.Dense(10, activation = tf.nn.softmax)     
])

# 当模型的 accuracy 不再提升时停止训练
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy',
                                                  patience = 3,
                                                  min_delta = 1e-4,
                                                  restore_best_weights = True)

# 保存模型
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath = 'saved_model/best_model_todate', 
                                                 save_best_only = True, 
                                                 save_weights_only = True,
                                                 monitor='val_accuracy',
                                                 mode='max')

# 遇到 NaN 丢失时终止训练
tn = tf.keras.callbacks.TerminateOnNaN()

# 模型学习率、学习次数等设置
scheduler = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate = lr_rate,
                                                           decay_steps = steps_per_epoch // 4,
                                                           decay_rate= 0.80,
                                                           staircase=True)
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)

# 当指标停止改进时降低学习率
lr_plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss',
                                                  factor = 0.1,
                                                  patience = 4, 
                                                  verbose = 3)

# 设置损失函数
loss_object = tf.keras.losses.CategoricalCrossentropy()

# 设置优化器
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_rate) 

2022-10-31 03:36:59.575362: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-31 03:36:59.702280: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-31 03:36:59.703059: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-31 03:36:59.705014: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

# 模型预测

In [16]:
# 编译模型
model.compile(optimizer=optimizer,loss=loss_object, metrics=['accuracy'])

In [17]:
# 模型训练
model.fit(image_generator, 
          epochs=epochs, 
          validation_data=(x_train, y_train), 
          steps_per_epoch=steps_per_epoch,
          callbacks = [lr_scheduler, lr_plateau, checkpoint_callback, early_stopping, tn]
          )

2022-10-31 03:37:02.844064: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/20


2022-10-31 03:37:04.400088: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


<keras.callbacks.History at 0x7fe3012f76d0>

In [18]:
# 模型预测
import numpy as np
predict = model.predict(x_test)
labels = [np.argmax(i) for i in predict]  # 返回概率最大值的索引值

# 提交结果

In [19]:
submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
submission['Label'] = labels
submission.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3


In [20]:
submission.to_csv('submission.csv', index = False)