# 1. Build CNN

## 1.1 Preparing

In [1]:
import numpy as np
import tensorflow as tf

## 1.2 Data preprocessing

In [5]:
# 导入手写数字数据集
from sklearn.datasets import load_digits

# 数据预处理
digits = load_digits()

X_data = digits.data.astype(np.float32)
Y_data = digits.target.astype(np.float32).reshape(-1,1)
print(X_data.shape)
print(Y_data.shape)

(1797, 64)
(1797, 1)


In [11]:
'''
数据的标准化（normalization）是将数据按比例缩放，
使之落入一个小的特定区间。这样去除数据的单位限制，
将其转化为无量纲的纯数值，便于不同单位或量级的指标能够进行比较和加权。
'''
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_data = scaler.fit_transform(X_data)
print('标准化后的数据:\n', X_data)

from sklearn.preprocessing import OneHotEncoder
Y = OneHotEncoder().fit_transform(Y_data).todense()  # one-hot 独热编码
print('\nOne-Hot独热编码:\n', Y)

标准化后的数据:
 [[ 0.      0.      0.3125 ...,  0.      0.      0.    ]
 [ 0.      0.      0.     ...,  0.625   0.      0.    ]
 [ 0.      0.      0.     ...,  1.      0.5625  0.    ]
 ..., 
 [ 0.      0.      0.0625 ...,  0.375   0.      0.    ]
 [ 0.      0.      0.125  ...,  0.75    0.      0.    ]
 [ 0.      0.      0.625  ...,  0.75    0.0625  0.    ]]

One-Hot独热编码:
 [[ 1.  0.  0. ...,  0.  0.  0.]
 [ 0.  1.  0. ...,  0.  0.  0.]
 [ 0.  0.  1. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  1.  0.]]


In [12]:
# 转换为图片的格式(batch, height, width, channels)
X = X_data.reshape(-1,8,8,1)

batch_size = 8  # 使用MBGD算法，设定batch_size为8

def generatebatch(X, Y, n_examples, batch_size):
    for batch_i in range(n_examples // batch_size):
        start = batch_i*batch_size
        end = start + batch_size
        batch_xs = X[start:end]
        batch_ys = Y[start:end]
        yield batch_xs, batch_ys  # 生成每一个batch

In [15]:
# 清除默认图的堆栈，并设置全图为默认图
tf.reset_default_graph()

## 1.3 layer实现

### 输入层