In [1]:
import numpy as np
import pandas as pd


In [2]:
#初始化模型的参数向量，即得到一个初始模型
#input_size：W1的维度，为32*32*3
#hidden_size：隐层的长度
#output_size：输出的维度，即分类数
#b为偏移
def init_model(input_size, hidden_size, output_size, std=1e-4):
    model = {}
    model['W1'] = std * np.random.randn(input_size, hidden_size)
    model['b1'] = np.zeros(hidden_size)
    model['W2'] = std * np.random.randn(hidden_size, output_size)
    model['b2'] = np.zeros(output_size)
    return model
model=init_model(32*32*3,50,10)

In [3]:
#定义一个两层的神经网络
def two_layer_net(X,y,model,reg):
    #两层神经网络的结构为：输入-FC-ReLu-FC-softmax-输出
    #模型参数
    W1, b1 = model['W1'], model['b1']
    W2, b2 = model['W2'], model['b2']
    N, D = X.shape
    #前向传播
    h1=np.maximum(0,np.dot(X,W1) + b1)
    h2=np.dot(h1,W2) + b2
    scores=h2  #N*10
    #softmax and final loss
    #exp_class_scores为最终的输出值
    exp_class_scores=np.exp(scores)
    exp_correct_class_scores=exp_class_scores[np.arange(N),y]
    
    _loss=-np.log(exp_correct_class_scores/np.sum(exp_class_scores,axis=1))
    loss=sum(_loss)/N
    loss+=reg*(np.sum(W1**2)+np.sum(W2**2)) #L2正则项
    
    #gradient 
    grads={}
    #反向传播
    #误逆差传播（BP）
    #output-->softmax
    #h2对W2求偏

    dh2=exp_class_scores / np.sum(exp_class_scores,axis=1,keepdims=True)
    #dh2=scores / np.sum(scores,axis=1,keepdims=True)
    dh2[np.arange(N),y] -= 1
    dh2 /= N
    #FC
    #W2
    dW2=np.dot(h1.T,dh2)
    dW2 += 2*reg*W2

    db2=np.sum(dh2,axis=0)

    #layer1
    dh1=np.dot(dh2,W2.T)

    dW1X_b1 = dh1
    dW1X_b1[h1 <= 0] = 0

    dW1 = np.dot(X.T, dW1X_b1)
    dW1 += 2 * reg * W1

    db1 = np.sum(dW1X_b1, axis=0)
    
    grads['W2'] = dW2
    grads['b2'] = db2
    grads['W1'] = dW1
    grads['b1'] = db1
    
    return loss,grads

In [4]:
#取数据
#先取20个数据来检查模型正确否
tr20=pd.read_csv("E:/Jupyter/data/tr20.csv")
na=np.array(tr20)
X_tr20=na[:,:3072]
y_tr20=na[:,3072]

In [6]:
loss,grads=two_layer_net(X_tr20,y_tr20,model,0)
loss

2.3026534284840037

In [7]:
loss,grads=two_layer_net(X_tr20,y_tr20,model,500)
loss

3.0718539521244876

In [44]:
##正式开始训练
#X,y为训练集
#X_val,y_val为验证集
#model，模型初始量
#two_layer_net,两层神经网络的损失函数以及梯度模型
#learning_rate 学习率
#verbose：进度信息是否显示
#num_epochs:训练次数
#reg 正则强度
def predict(X,model):
    W1, b1 = model['W1'], model['b1']
    W2, b2 = model['W2'], model['b2']
    
    h1 = np.maximum(0, np.dot(X, W1) + b1)
    h2 = np.dot(h1, W2) + b2
    scores = h2
    y_pred = np.argmax(scores, axis=1)
    return y_pred
def train(X, y, X_val, y_val,
    model,two_layer_net,
    num_epochs,reg,
    learning_rate=1e-3, learning_rate_decay=0.95,verbose=True):
    
    for it in range(num_epochs):
        loss, grads = two_layer_net(X,y,model,reg=reg)
        #更新模型
        for param_name in model:
            model[param_name] += -learning_rate * grads[param_name]

        #训练准确率
        train_acc = (predict(X,model) == y).mean()
        val_acc = (predict(X_val,model) == y_val).mean()
        #显示训练进度
        if verbose and it % 10 == 0:
            print('Finished epoch %d / %d: loss %f, train_acc: %f, val_acc: %f' % (it, num_epochs, loss,train_acc,val_acc))

In [9]:
#取50个数据作为验证集
val50=pd.read_csv("E:/Jupyter/data/val50.csv")
na=np.array(val50)
X_val50=na[:,:3072]
y_val50=na[:,3072]
#取1000个用于训练的数据
tr1000=pd.read_csv("E:/Jupyter/data/tr1000.csv")
na=np.array(tr1000)
X_tr1000=na[:,:3072]
y_tr1000=na[:,3072]

In [54]:
#先检验一下，模型是不是正确的
reg=0
lr=1e-4
model=init_model(32*32*3,50,10)
train(X_tr20,y_tr20,X_val50,y_val50,model,two_layer_net,300,reg,lr)

Finished epoch 0 / 300: loss 2.302507, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 10 / 300: loss 2.294520, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 20 / 300: loss 2.177781, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 30 / 300: loss 1.998961, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 40 / 300: loss 1.958202, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 50 / 300: loss 1.935706, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 60 / 300: loss 1.917948, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 70 / 300: loss 1.899105, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 80 / 300: loss 1.873242, train_acc: 0.250000, val_acc: 0.140000
Finished epoch 90 / 300: loss 1.833338, train_acc: 0.300000, val_acc: 0.140000
Finished epoch 100 / 300: loss 1.773588, train_acc: 0.300000, val_acc: 0.160000
Finished epoch 110 / 300: loss 1.695177, train_acc: 0.350000, val_acc: 0.180000
Finished epoch 120 / 300: loss 1.602029, train_acc:

In [None]:
#很好，产生过拟合，训练集的精度为100%，说明模型没问题

In [46]:
#正式训练
#第一步：调一个合适的学习率
reg=0
lr=2*1e-4
model=init_model(32*32*3,50,10)
train(X_tr1000,y_tr1000,X_val50,y_val50,model,two_layer_net,300,reg,lr)

Finished epoch 0 / 300: loss 2.302597, train_acc: 0.108000, val_acc: 0.060000
Finished epoch 10 / 300: loss 2.302430, train_acc: 0.105000, val_acc: 0.100000
Finished epoch 20 / 300: loss 2.302032, train_acc: 0.106000, val_acc: 0.100000
Finished epoch 30 / 300: loss 2.300821, train_acc: 0.106000, val_acc: 0.100000
Finished epoch 40 / 300: loss 2.297424, train_acc: 0.105000, val_acc: 0.100000
Finished epoch 50 / 300: loss 2.290647, train_acc: 0.105000, val_acc: 0.100000
Finished epoch 60 / 300: loss 2.281614, train_acc: 0.105000, val_acc: 0.100000
Finished epoch 70 / 300: loss 2.268297, train_acc: 0.112000, val_acc: 0.080000
Finished epoch 80 / 300: loss 2.248844, train_acc: 0.125000, val_acc: 0.080000
Finished epoch 90 / 300: loss 2.225142, train_acc: 0.142000, val_acc: 0.080000
Finished epoch 100 / 300: loss 2.195550, train_acc: 0.178000, val_acc: 0.100000
Finished epoch 110 / 300: loss 2.160730, train_acc: 0.198000, val_acc: 0.100000
Finished epoch 120 / 300: loss 2.127574, train_acc:

In [52]:
reg=500
lr=3*1e-4
model=init_model(32*32*3,50,10)
train(X_tr1000,y_tr1000,X_val50,y_val50,model,two_layer_net,300,reg,lr)

Finished epoch 0 / 300: loss 3.071337, train_acc: 0.110000, val_acc: 0.060000
Finished epoch 10 / 300: loss 2.303198, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 20 / 300: loss 2.302583, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 30 / 300: loss 2.302581, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 40 / 300: loss 2.302579, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 50 / 300: loss 2.302577, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 60 / 300: loss 2.302576, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 70 / 300: loss 2.302574, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 80 / 300: loss 2.302573, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 90 / 300: loss 2.302571, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 100 / 300: loss 2.302570, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 110 / 300: loss 2.302568, train_acc: 0.113000, val_acc: 0.060000
Finished epoch 120 / 300: loss 2.302567, train_acc:

In [48]:
print(predict(X_tr20,model))

[6 8 9 6 8 6 6 5 8 9 5 0 0 5 0 8 8 5 0 1]


In [49]:
print(y_tr20)

[6 9 9 4 1 1 2 7 8 3 4 7 7 2 9 9 9 3 2 6]
