In [55]:
# coding:utf-8
from __future__ import print_function

import numpy as np
from random import shuffle
from past.builtins import xrange

import pickle
from past.builtins import xrange


'''
文件读取和数据获取
'''
def unpickle(file):
  with open(file,'rb') as fo:
    dict=pickle.load(fo)
  return dict



def load_cifar10(file):
    dictTrain = unpickle(file + "data_batch_1")
    dataTrain = dictTrain['data']
    labelTrain = dictTrain['labels']

    for i in range(2,6):
        dictTrain = unpickle(file+"data_batch_"+str(i))
        dataTrain = np.vstack([dataTrain, dictTrain['data']])
        labelTrain = np.hstack([labelTrain, dictTrain['labels']])

    dictTest = unpickle(file + "test_batch")
    dataTest = dictTest['data']
    labelTest = dictTest['labels']
    labelTest = np.array(labelTest)

    return dataTrain, labelTrain, dataTest, labelTest


def softmax_loss_naive(W, X, y, reg):
    
    """
    Softmax loss function, naive implementation (with loops)
    Inputs have dimension D, there are C classes, and we operate on minibatches
    of N examples.
    Inputs:
    - W: 权重矩阵
    - X: 图片样本数据集(矩阵)
    - y: 训练图片的标签
    - reg: (float) regularization strength
    Returns a tuple of:
    - loss 损失函数值
    - dW矩阵
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W) #初始化dW矩阵
  #############################################################################
  # TODO: Compute the softmax loss and its gradient using explicit loops.     #
  # Store the loss in loss and the gradient in dW. If you are not careful     #
  # here, it is easy to run into numeric instability. Don't forget the        #
  # regularization!                                                           #
  #############################################################################
    num_train=X.shape[0]
    num_class=W.shape[1]    #标签数

    for i in xrange(num_train):
        score = X[i].dot(W) #分类器预测结果
        score-=np.max(score)    #提高计算中的数值稳定性

        correct_score = score[y[i]]   #取分类正确的评分值
        exp_sum=np.sum(np.exp(score))

        loss+=np.log(exp_sum)-correct_score #计算一张图片的loss值

        for j in xrange(num_class):
            
            if j==y[i]: #图片标签
                dW[:,j]+=np.exp(score[j])/exp_sum*X[i]-X[i]
            else:
                dW[:,j]+=np.exp(score[j])/exp_sum*X[i]

    #平均loss            
    loss/=num_train #一个训练集合平均loss
    loss+=0.5*reg*np.sum(W*W)   #对W中元素平方后求和，计算L
    dW/=num_train
    dW+=reg*W
  #############################################################################
  #                          END OF YOUR CODE                                 #
  #############################################################################
    return loss, dW

def softmax_loss_vectorized(W, X, y, reg):
    """
    Softmax loss function, vectorized version.
    Inputs and outputs are the same as softmax_loss_naive.
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)
  #############################################################################
  # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
  # Store the loss in loss and the gradient in dW. If you are not careful     #
  # here, it is easy to run into numeric instability. Don't forget the        #
  # regularization!                                                           #

  #############################################################################
    num_train=X.shape[0] 

    score = X.dot(W)
    score -= np.max(score, axis = 1)[:, np.newaxis]    #axis = 1每一行的最大值，score仍为500*10

    correct_score=score[range(num_train), y]    #correct_score变为500*1
    exp_score = np.exp(score)
    sum_exp_score = np.sum(exp_score, axis = 1)    #sum_exp_score为500*1

    loss = np.sum(np.log(sum_exp_score)) - np.sum(correct_score)
    exp_score /= sum_exp_score[:,np.newaxis]  #exp_score为500*10

    for i in xrange(num_train):
        dW += exp_score[i] * X[i][:,np.newaxis]   # X[i][:,np.newaxis]将X[i]增加一列纬度
        dW[:, y[i]] -= X[i]

    loss/=num_train
    loss+=0.5*reg*np.sum(W*W)
    dW/=num_train
    dW+=reg*W
    

  #############################################################################
  #                          END OF YOUR CODE                                 #
  #############################################################################
    return loss, dW


class LinearClassifier(object):
    def __init__(self):
        self.W = None

    def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=200,batch_size=500, verbose=True):
        ####随机梯度下降
        """
        Train this linear classifier using stochastic gradient descent.
        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
        training samples each of dimension D.
        - y: A numpy array of shape (N,) containing training labels; y[i] = c
        means that X[i] has label 0 <= c < C for C classes.
        - learning_rate: (float) learning rate for optimization.
        - reg: (float) regularization strength.
        - num_iters: (integer) number of steps to take when optimizing
        - batch_size: (integer) number of training examples to use at each step.
        - verbose: (boolean) If true, print progress during optimization.
        Outputs:
        A list containing the value of the loss function at each training iteration.
        """
        num_train, dim = X.shape
        num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
        if self.W is None:
            # lazily initialize W
            self.W = 0.001 * np.random.randn(dim, num_classes)  #生成随机矩阵
        # Run stochastic gradient descent to optimize W
        loss_history = []
        for it in xrange(num_iters):
            X_batch = None
            y_batch = None
        #########################################################################
        # TODO:                                                                 #
        # Sample batch_size elements from the training data and their           #
        # corresponding labels to use in this round of gradient descent.        #
        # Store the data in X_batch and their corresponding labels in           #
        # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
        # and y_batch should have shape (batch_size,)                           #
        #                                                                       #
        # Hint: Use np.random.choice to generate indices. Sampling with         #
        # replacement is faster than sampling without replacement.              #
        #########################################################################
            sample_index = np.random.choice(num_train, batch_size, replace=False)
            X_batch = X[sample_index, :]   # select the batch sample
            y_batch = y[sample_index]      # select the batch label     
        #########################################################################
        #                       END OF YOUR CODE                                #
        #########################################################################
        # evaluate loss and gradient
            loss, grad = self.loss(X_batch, y_batch, reg) #获取loss数值
            loss_history.append(loss) #把loss添加到末尾
        # perform parameter update
        #########################################################################
        # TODO:                                                                 #
        # Update the weights using the gradient and the learning rate.          #
        #########################################################################
        # perform parameter update
            self.W += -learning_rate * grad    
        #########################################################################
        #                       END OF YOUR CODE                                #
        #########################################################################
            if verbose and it % 100 == 0:
                print('iteration %d / %d: loss %f' % (it, num_iters, loss))
        return loss_history
    
    def predict(self, X):
        """
        Use the trained weights of this linear classifier to predict labels for
        data points.

        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
          training samples each of dimension D.

        Returns:
        - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
          array of length N, and each element is an integer giving the predicted
          class.
        """
        y_pred = np.zeros(X.shape[1])
        ###########################################################################
        # TODO:                                                                   #
        # Implement this method. Store the predicted labels in y_pred.            #
        ###########################################################################
        score = X.dot(self.W)
        y_pred = np.argmax(score,axis=1)
        ###########################################################################
        #                           END OF YOUR CODE                              #
        ###########################################################################
        return y_pred

    def loss(self, X_batch, y_batch, reg):
        """
        Compute the loss function and its derivative. 
        Subclasses will override this.
        
        Inputs:
        - X_batch: A numpy array of shape (N, D) containing a minibatch of N
        data points; each point has dimension D.
        - y_batch: A numpy array of shape (N,) containing labels for the minibatch.
        - reg: (float) regularization strength
        
        Returns: A tuple containing:
        - loss as a single float
        - gradient with respect to self.W; an array of the same shape as W
        """
        return softmax_loss_naive(self.W, X_batch, y_batch, reg)

class Softmax(LinearClassifier):
    """ A subclass that uses the Softmax + Cross-entropy loss function """
    def loss(self, X_batch, y_batch, reg):
        return softmax_loss_naive(self.W, X_batch, y_batch, reg)
        #return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)



In [56]:
'''
训练开始
'''

file_path = "./"

#获取数据集
dataTrain1, labelTrain1, dataTest1, labelTest1 = load_cifar10(file_path)

In [57]:
LC = LinearClassifier()
SM = Softmax()

print(dataTrain1.shape)

for i in range(50):
    LC.train(dataTrain1[:(1+i)*500,:], labelTrain1[:500*(i+1)])
  
    print('trainning %d times \n' % (i+1))



(50000, 3072)
iteration 0 / 200: loss 13.064733
iteration 100 / 200: loss 8557.244809
trainning 1 times 

iteration 0 / 200: loss 4095.092160
iteration 100 / 200: loss 3989.959973
trainning 2 times 

iteration 0 / 200: loss 5610.378672
iteration 100 / 200: loss 4660.253644
trainning 3 times 

iteration 0 / 200: loss 4703.475387
iteration 100 / 200: loss 4075.405712
trainning 4 times 

iteration 0 / 200: loss 3135.915568
iteration 100 / 200: loss 3137.449369
trainning 5 times 

iteration 0 / 200: loss 4226.652157
iteration 100 / 200: loss 4488.025291
trainning 6 times 

iteration 0 / 200: loss 4371.073670
iteration 100 / 200: loss 6482.591849
trainning 7 times 

iteration 0 / 200: loss 3170.833611
iteration 100 / 200: loss 4045.285160
trainning 8 times 

iteration 0 / 200: loss 5182.441596
iteration 100 / 200: loss 4447.470787
trainning 9 times 

iteration 0 / 200: loss 4228.659167
iteration 100 / 200: loss 4566.798899
trainning 10 times 

iteration 0 / 200: loss 3236.605760
iteration 1

In [58]:
print(dataTest1.size)
pre = LC.predict(dataTest1[:100,:])
print(pre.size)
acc = 0

# print(pre.size)
# print(labelTest1.size)

for i in range(pre.size):
  if (pre[i] == labelTest1[i]):
    acc += 1

print(acc)
print('accuracy is %f' % (100*acc/pre.size))


30720000
100
21
accuracy is 21.000000
