In [1]:
import numpy as np 
import pandas as pd
import tensorflow as tf
import math
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
input_file = pd.read_csv('./input/train.csv')
images = input_file.iloc[:,1:].values
labels = input_file.iloc[:,:1].values
train_images,test_images,train_labels,test_labels = train_test_split(images,labels,test_size = 0.02)


In [3]:
def set_up_images(train_images,test_images,train_labels,test_labels):
        print("Begin the set up for training images...")
        training_images = train_images
        train_len = training_images.shape[0]
        training_images = training_images.reshape(train_len,28,28,1)/255
        training_labels = train_labels
        training_labels = training_labels.reshape(-1)
        #training_labels = encode_labels(training_labels,10)
        
        
        print("Begin the set up for test images...")
        testing_images = test_images
        test_len = testing_images.shape[0]
        testing_images = testing_images.reshape(test_len,28,28,1)/255
        
        testing_labels = test_labels.reshape(-1) 
        #testing_labels = encode_labels(test_labels,10)
        
        return training_images,testing_images,training_labels,testing_labels
        
        
training_images,testing_images,training_labels,testing_labels = set_up_images(train_images,test_images,train_labels,test_labels)
print("Training_images.shape:",training_images.shape)
print("training_labels.shape:",training_labels.shape)
print("testing_images.shape:",testing_images.shape)
print("testing_labels.shape:",testing_labels.shape)

Begin the set up for training images...
Begin the set up for test images...
Training_images.shape: (41160, 28, 28, 1)
training_labels.shape: (41160,)
testing_images.shape: (840, 28, 28, 1)
testing_labels.shape: (840,)


In [4]:
device = '/gpu:0'
learning_rate = 5e-3
input_shape = (28,28,1)
channel_1 = 16
channel_2 = 32
channel_3 = 64
channel_4 = 128
channel_5 = 256
num_classes = 10

tf.reset_default_graph()

with tf.device(device):
    initializer = tf.variance_scaling_initializer(scale=2.0)
    layers = [
        tf.keras.layers.Conv2D(channel_1,(3,3),(1,1),"same",
                                      activation=tf.nn.relu,use_bias=True,kernel_initializer=initializer,bias_initializer=tf.zeros_initializer()),
        tf.keras.layers.BatchNormalization(),
        
        tf.keras.layers.Conv2D(channel_2,(3,3),(1,1),"same",
                                      activation=tf.nn.relu,use_bias=True,kernel_initializer=initializer,bias_initializer=tf.zeros_initializer()),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPool2D(strides=2),
        
        tf.keras.layers.Conv2D(channel_3,(3,3),(1,1),"same",
                                      activation=tf.nn.relu,use_bias=True,kernel_initializer=initializer,bias_initializer=tf.zeros_initializer()),
        tf.keras.layers.BatchNormalization(),
        
        tf.keras.layers.Conv2D(channel_4,(3,3),(1,1),"same",
                                      activation=tf.nn.relu,use_bias=True,kernel_initializer=initializer,bias_initializer=tf.zeros_initializer()),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPool2D(strides=2),
        
        tf.keras.layers.Conv2D(channel_5,(3,3),(1,1),"same",
                                      activation=tf.nn.relu,use_bias=True,kernel_initializer=initializer,bias_initializer=tf.zeros_initializer()),
        tf.keras.layers.BatchNormalization(),
        
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(num_classes, kernel_initializer=initializer,kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('softmax'),
    ]
    model = tf.keras.Sequential(layers)
    optimizer = tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    #optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)
    model.compile(optimizer,"sparse_categorical_crossentropy", metrics=['accuracy'])
    model.fit(training_images, training_labels, batch_size=100, epochs=10, validation_data=(testing_images,testing_labels))


Train on 41160 samples, validate on 840 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
test_file = pd.read_csv('./input/test.csv')
pre_images = test_file.iloc[:,:].values
pre_len = pre_images.shape[0]
pre_images = pre_images.reshape(pre_len,28,28,1)/255
print(pre_images.shape)

(28000, 28, 28, 1)


In [6]:
result = model.predict(pre_images, batch_size=100, verbose=0)
predict = []
for i in range(pre_len):
    predict.append(np.argmax(result[i,:]))
submission = pd.DataFrame({"ImageId":range(1,28001),"Label":np.int32(predict)})
submission.to_csv("submission.csv",index = False)