In [None]:
# -*- coding=utf-8 -*-
'''
AlexNet：
GPU0: in-[卷积+ReLu-池化+标准化]-[卷积-ReLu-池化-标准化]+[卷积-RuLu]+[卷积-RuLu]+[卷积-RuLu]+池化-[全连接+dropout]-[全连接+dropout]-out0
GPU1: in-[卷积+ReLu-池化]-[卷积+ReLu-池化]+[卷积-RuLu]+[卷积-RuLu]+[卷积-RuLu]+池化-[全连接+dropout]-[全连接+dropout]-out1
out: out0.join.out1
---------------------
input: 227×227×3
output: 1000
'''
import tensorflow as tf
from tensorflow.python.client import device_lib

# 配置神经网络参数
INPUT_NODE = 784
OUTPUT_NODE = 10

# 配置图像数据参数
IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10

# 第一层卷积层
CONV1_SIZE = 11
CONV1_DEEP = 48
CONV1_STRIDES = 4
POOL1_KSIZE = 3
POOL1_STRIDES = 2
# 第一层卷积层
CONV2_SIZE = 5
CONV2_DEEP = 128
CONV2_STRIDES = 1
POOL2_KSIZE = 3
POOL2_STRIDES = 2
# 第一层卷积层
CONV3_SIZE = 3
CONV3_DEEP = 192
CONV3_STRIDES = 1
# 第一层卷积层
CONV4_SIZE = 3
CONV4_DEEP = 192
CONV4_STRIDES = 1
# 第一层卷积层
CONV5_SIZE = 3
CONV5_DEEP = 128
CONV5_STRIDES = 1
POOL5_KSIZE = 3
POOL5_STRIDES = 2
# 全连接层的节点个数
FC_SIZE = 2048
OUT_SIZE = 1000

'''
获取本地GPU个数
'''
def get_local_gpu_num():
    gn = 0
    for x in device_lib.list_local_devices():
        if x.device_type == 'GPU':
            gn += 1
    return gn

'''
生成weights变量[get_varibale]
支持正则化损失函数
'''
def get_weight_variable(shape, stddev=0.1, regularizer=None):
    weights = tf.get_variable('weights', shape, initializer=tf.truncated_normal_initializer(stddev=stddev))
    if regularizer != None:
        tf.add_to_collection('losses', regularizer(weights))  
    return weights

'''
生成biases变量[get_varibale]
'''
def get_biase_variable(shape, initv=0.0):
    biases = tf.get_variable('biases', shape, initializer=tf.constant_initializer(initv))
    return biases

# 定义卷积+ReLu操作
def conv_relu_op(input_tensor, size, channel, deep, strides, padding='SAME'):
    weights = get_weight_variable([size, size, channel, deep])
    biases = get_biase_variable([deep])
    conv = tf.nn.conv2d(input_tensor, weights, strides=[1,strides,strides,1], padding=padding)
    relu = tf.nn.relu(tf.nn.bias_add(conv, biases))
    return relu

# 定义最大池化操作
def max_pool_op(input_tensor, ksize=3, strides=1, padding='SAME'):
    return tf.nn.max_pool(input_tensor, ksize=[1, ksize, ksize, 1], strides=[1, strides, strides, 1], padding=padding)

# 定义标准化操作
def norm_op(input_tensor, lsize=4):
    return tf.nn.lrn(input_tensor, lsize, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

'''
定义卷积神经网络前向传播过程
支持滑动平均模型
支持正则化损失函数
添加train，用于区分训练过程和测试过程
'''
def inference(input_tensor, train, avg_class, regularizer, reuse=Fasle):
    if get_local_gpu_num() >= 2:
        with tf.device('/gpu_0'):
            fc_gpu0 = inference_gpu0(input_tensor, train, avg_class, regularizer)
        with tf.device('/gpu_1'):
            fc_gpu1 = inference_gpu1(input_tensor, train, avg_class, regularizer)
    else:
        fc_gpu0 = inference_gpu0(input_tensor, train, avg_class, regularizer)
        fc_gpu1 = inference_gpu1(input_tensor, train, avg_class, regularizer)
    
    fc = tf.concat(2, [fc_gpu0, fc_gpu1])
    # 定义最后一层输出层
    with tf.variable_scope('layer8-softmax', reuse=reuse):
        # 只有全连接层的权重需要加入正则化
        weights = get_weight_variable([FC_SIZE*2, OUT_SIZE], regularizer=regularizer)
        biases = get_biase_variable([OUT_SIZE], initv=0.1)
        # 使用relu激活函数
        logit = tf.matmul(fc, weights)+biases
        
    return logit
    
    
def inference_gpu0(input_tensor, train, avg_class, regularizer, reuse=False):
    # 定义第一层卷积层：输入28×28×1 输出28×28×32
    with tf.variable_scope('layer1-conv-gpu0', reuse=reuse):
        conv1 = conv_relu_op(input_tensor, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP, CONV1_STRIDES)
        pool1 = max_pool_op(conv1, POOL1_KSIZE, POOL1_STRIDES)
        
    # 定义第二层卷积层：输入28×28×32 输出14×14×32
    with tf.variable_scope('layer2-conv-gpu0', reuse=reuse):
        conv2 = conv_relu_op(pool1, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP, CONV2_STRIDES)
        pool2 = max_pool_op(conv2, POOL2_KSIZE, POOL2_STRIDES)
        
    # 定义第三层卷积层：输入28×28×32 输出14×14×32
    with tf.variable_scope('layer3-conv-gpu0', reuse=reuse):
        conv3 = conv_relu_op(pool2, CONV3_SIZE, CONV3_DEEP, CONV3_DEEP, CONV3_STRIDES)
    
    # 定义第四层卷积层：输入28×28×32 输出14×14×32
    with tf.variable_scope('layer4-conv-gpu0', reuse=reuse):
        conv4 = conv_relu_op(conv3, CONV4_SIZE, CONV3_DEEP, CONV4_DEEP, CONV4_STRIDES)
    
    # 定义第五层卷积层：输入28×28×32 输出14×14×32
    with tf.variable_scope('layer5-conv-gpu0', reuse=reuse):
        conv5 = conv_relu_op(conv4, CONV5_SIZE, CONV4_DEEP, CONV5_DEEP, CONV5_STRIDES)
        pool5 = max_pool_op(conv5, POOL5_KSIZE, POOL5_STRIDES)
        
    # 将卷积层结果转为FC层输入格式
    pool_shape = pool5.get_shape().as_list()
    nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
    reshaped = tf.reshape(pool5, [pool_shape[0], nodes])
    
    # 定义第六层全连接层：输入3136×1 输出512×1
    with tf.variable_scope('layer6-fc-gpu0', reuse=reuse):
        # 只有全连接层的权重需要加入正则化
        weights = get_weight_variable([nodes, FC_SIZE], regularizer=regularizer)
        biases = get_biase_variable([FC_SIZE], initv=0.1)
        # 使用relu激活函数
        fc6 = tf.nn.relu(tf.matmul(reshaped, weights)+biases)
        # dropout避免过拟合：在训练过程中会随机将部分节点输出为0
        if train: fc1 = tf.nn.dropout(fc6, 0.5)
    
    # 定义第七层全连接层：输入3136×1 输出512×1
    with tf.variable_scope('layer7-fc-gpu0', reuse=reuse):
        # 只有全连接层的权重需要加入正则化
        weights = get_weight_variable([FC_SIZE, FC_SIZE], regularizer=regularizer)
        biases = get_biase_variable([FC_SIZE], initv=0.1)
        # 使用relu激活函数
        fc7 = tf.nn.relu(tf.matmul(fc6, weights)+biases)
        # dropout避免过拟合：在训练过程中会随机将部分节点输出为0
        if train: fc7 = tf.nn.dropout(fc7, 0.5)
        
    return fc7

def inference_gpu1(input_tensor, train, avg_class, regularizer, reuse=False):
    # 定义第一层卷积层：输入28×28×1 输出28×28×32
    with tf.variable_scope('layer1-conv-gpu1', reuse=reuse):
        conv1 = conv_relu_op(input_tensor, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP, CONV1_STRIDES)
        pool1 = max_pool_op(conv1, POOL1_KSIZE, POOL1_STRIDES)
        norm1 = norm_op(pool1)
        
    # 定义第二层卷积层：输入28×28×32 输出14×14×32
    with tf.variable_scope('layer2-conv-gpu1', reuse=reuse):
        conv2 = conv_relu_op(norm1, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP, CONV2_STRIDES)
        pool2 = max_pool_op(conv2, POOL2_KSIZE, POOL2_STRIDES)
        norm2 = norm_op(pool2)
        
    # 定义第三层卷积层：输入28×28×32 输出14×14×32
    with tf.variable_scope('layer3-conv-gpu1', reuse=reuse):
        conv3 = conv_relu_op(norm2, CONV3_SIZE, CONV3_DEEP, CONV3_DEEP, CONV3_STRIDES)
    
    # 定义第四层卷积层：输入28×28×32 输出14×14×32
    with tf.variable_scope('layer4-conv-gpu1', reuse=reuse):
        conv4 = conv_relu_op(conv3, CONV4_SIZE, CONV3_DEEP, CONV4_DEEP, CONV4_STRIDES)
    
    # 定义第五层卷积层：输入28×28×32 输出14×14×32
    with tf.variable_scope('layer5-conv-gpu1', reuse=reuse):
        conv5 = conv_relu_op(conv4, CONV5_SIZE, CONV4_DEEP, CONV5_DEEP, CONV5_STRIDES)
        pool5 = max_pool_op(conv5, POOL5_KSIZE, POOL5_STRIDES)
        
    # 将卷积层结果转为FC层输入格式
    pool_shape = pool5.get_shape().as_list()
    nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
    reshaped = tf.reshape(pool5, [pool_shape[0], nodes])
    
    # 定义第六层全连接层：输入3136×1 输出512×1
    with tf.variable_scope('layer6-fc-gpu1', reuse=reuse):
        # 只有全连接层的权重需要加入正则化
        weights = get_weight_variable([nodes, FC_SIZE], regularizer=regularizer)
        biases = get_biase_variable([FC_SIZE], initv=0.1)
        # 使用relu激活函数
        fc6 = tf.nn.relu(tf.matmul(reshaped, weights)+biases)
        # dropout避免过拟合：在训练过程中会随机将部分节点输出为0
        if train: fc1 = tf.nn.dropout(fc6, 0.5)
    
    # 定义第七层全连接层：输入3136×1 输出512×1
    with tf.variable_scope('layer7-fc-gpu1', reuse=reuse):
        # 只有全连接层的权重需要加入正则化
        weights = get_weight_variable([FC_SIZE, FC_SIZE], regularizer=regularizer)
        biases = get_biase_variable([FC_SIZE], initv=0.1)
        # 使用relu激活函数
        fc7 = tf.nn.relu(tf.matmul(fc6, weights)+biases)
        # dropout避免过拟合：在训练过程中会随机将部分节点输出为0
        if train: fc7 = tf.nn.dropout(fc7, 0.5)
        
    return fc7