In [340]:
import numpy as np

def binary_train(X, y, loss="perceptron", w0=None, b0=None, step_size=0.5, max_iterations=1000):
    """
    Inputs:
    - X: training features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - y: binary training labels, a N dimensional numpy array where 
    N is the number of training points, indicating the labels of 
    training data
    - loss: loss type, either perceptron or logistic
    - step_size: step size (learning rate)
	- max_iterations: number of iterations to perform gradient descent

    Returns:
    - w: D-dimensional vector, a numpy array which is the weight 
    vector of logistic or perceptron regression
    - b: scalar, which is the bias of logistic or perceptron regression
    """
    N, D = X.shape
    assert len(np.unique(y)) == 2
    
    w = np.zeros(D)
    if w0 is not None:
        w = w0
    
    b = 0
    if b0 is not None:
        b = b0

    if loss == "perceptron":
        ############################################
        # TODO 1 : Edit this if part               #
        #          Compute w and b here            #
        w = np.zeros(D)
        b = 0
        
        y = 2 * y - 1 # 映射到 -1 1
        for i in range(max_iterations):                
            predict = X.dot(w.T) + b  # n * 1
#             predict = np.sign(predict)
#             err = predict - y
            err = predict * y
            errindex = np.where(err <= 0)
            rightindex = np.where(err > 0)
            err[errindex] = 1
            err[rightindex] = 0
            
            w_c =  step_size * (err * y).dot(X) / N
            b_c =  step_size * np.sum(err * y) / N

            w = w + w_c
            b = b + b_c
#             predict = np.sign(predict)
#             err = predict - y                                   
#             w_c = - step_size * 2 * err.dot(X) / N
#             b_c = - step_size * 2 * np.sum(err) / N            
            
            
#             predict[np.where(predict <= 0)] = 0
#             predict[np.where(predict > 0)] = 1
#             err = predict - y
#             errindex = np.where(err != 0)[0]
#             err = err[ errindex ]
            
#             w_c = - step_size * err.dot(X) / N
#             b_c = - step_size * np.sum(err) / N


        ############################################
        

    elif loss == "logistic":
        ############################################
        # TODO 2 : Edit this if part               #
        #          Compute w and b here            #
        w = np.zeros(D)
        b = 0
        
        for i in range(max_iterations):
            
            err = sigmoid(X.dot(w.T) + b) - y  # n*1
            w_c = -step_size * err.T.dot(X)/ y.shape[0]   # 1*n   *  n*2   = 1 * 2
            b_c = -step_size * np.sum(err) / y.shape[0]    #  1*n  * n*1
            w = w + w_c
            b = b + b_c
        ############################################
        

    else:
        raise "Loss Function is undefined."

    assert w.shape == (D,)
    return w, b


def sigmoid(z):
    
    """
    Inputs:
    - z: a numpy array or a float number
    
    Returns:
    - value: a numpy array or a float number after computing sigmoid function value = 1/(1+exp(-z)).
    """

    ############################################
    # TODO 3 : Edit this part to               #
    #          Compute value                   #
    value = z
    value = 1 / (1 + np.exp(-z))
    ############################################
    
    return value

def binary_predict(X, w, b, loss="perceptron"):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - w: D-dimensional vector, a numpy array which is the weight 
    vector of your learned model
    - b: scalar, which is the bias of your model
    - loss: loss type, either perceptron or logistic
    
    Returns:
    - preds: N dimensional vector of binary predictions: {0, 1}
    """
    N, D = X.shape
    
    if loss == "perceptron":
        ############################################
        # TODO 4 : Edit this if part               #
        #          Compute preds                   #
        preds = np.zeros(N)
        
        preds = w.dot(X.T) + b
        preds[np.where(preds <= 0)] = 0
        preds[np.where(preds > 0)] = 1
        assert preds.shape == (N,) 
        
        ############################################
        

    elif loss == "logistic":
        ############################################
        # TODO 5 : Edit this if part               #
        #          Compute preds                   #
        preds = np.zeros(N)
        
        preds = sigmoid(w.dot(X.T) + b)
        preds[np.where(preds <= 0.5)] = 0
        preds[np.where(preds > 0.5)] = 1
        assert preds.shape == (N,)
        ############################################
        

    else:
        raise "Loss Function is undefined."
    

    assert preds.shape == (N,) 
    return preds

In [18]:
from sklearn.datasets import make_classification, make_blobs, make_moons, load_iris
from sklearn.model_selection import train_test_split
# from linear_regression import mapping_data

import json
import numpy as np
import pandas as pd


# Binary classification data
def toy_data_binary():
  data = make_classification(n_samples=500, 
                              n_features=2,
                              n_informative=1, 
                              n_redundant=0, 
                              n_repeated=0, 
                              n_classes=2, 
                              n_clusters_per_class=1, 
                              class_sep=1., 
                              random_state=42)

  X_train, X_test, y_train, y_test = train_test_split(data[0], data[1], train_size=0.7, random_state=42)
  return X_train, X_test, y_train, y_test


def moon_dataset():
  data = make_moons(n_samples=500, shuffle=True, noise=0.2, random_state=42)
  X_train, X_test, y_train, y_test = train_test_split(data[0], data[1], train_size=0.7, random_state=42)
  return X_train, X_test, y_train, y_test


# Multiple classification data

def toy_data_multiclass():
  data = make_blobs(n_samples=2000,
                     n_features=2,
                     random_state=42,
                     centers=[[-0.5, 1],[0, 1.5],[0.5, 1]],
                     cluster_std=0.2);
  X_train, X_test, y_train, y_test = train_test_split(data[0], data[1], train_size=0.7, random_state=42)
  return X_train, X_test, y_train, y_test


# Hand-written digits data
def data_loader_mnist(dataset='mnist_subset.json'):
  # This function reads the MNIST data and separate it into train, val, and test set
  with open(dataset, 'r') as f:
        data_set = json.load(f)
  train_set, valid_set, test_set = data_set['train'], data_set['valid'], data_set['test']

  return np.asarray(train_set[0]), \
          np.asarray(test_set[0]), \
          np.asarray(train_set[1]), \
          np.asarray(test_set[1])


In [285]:
X_train, X_test, y_train, y_test = toy_data_binary()
y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1])

In [48]:
# numpy的一维向量不在乎是横向量还是纵向量
A = np.array([1,2])
B = np.array([3,4])
A.dot(B)

11

In [329]:
w = np.array(range(9))
c = np.where(w < 5)
c
w[c] = 0
w

array([0, 0, 0, 0, 0, 5, 6, 7, 8])

In [295]:
from __future__ import division, print_function
import numpy as np


def accuracy_score(true, preds):
    return np.sum(true == preds).astype(float) / len(true)

def run_binary():
    datasets = [(toy_data_binary(), 'Synthetic data'), 
                (moon_dataset(), 'Two Moon data'),
                (data_loader_mnist(), 'Binarized MNIST data')]

    for data, name in datasets:
        print(name)
        X_train, X_test, y_train, y_test = data

        if name == 'Binarized MNIST data':
            y_train = [0 if yi < 5 else 1 for yi in y_train]
            y_test = [0 if yi < 5 else 1 for yi in y_test]
            y_train = np.asarray(y_train)
            y_test = np.asarray(y_test)

        for loss_type in ["perceptron", "logistic"]:
            w, b = binary_train(X_train, y_train, loss=loss_type)
            train_preds = binary_predict(X_train, w, b, loss=loss_type)
            preds = binary_predict(X_test, w, b, loss=loss_type)            
            print(loss_type + ' train acc: %f, test acc: %f' 
                %(accuracy_score(y_train, train_preds), accuracy_score(y_test, preds)))
        print()

In [341]:
run_binary()

Synthetic data
perceptron train acc: 0.994286, test acc: 1.000000
logistic train acc: 0.994286, test acc: 1.000000

Two Moon data
perceptron train acc: 0.820000, test acc: 0.840000
logistic train acc: 0.857143, test acc: 0.866667

Binarized MNIST data
perceptron train acc: 0.873400, test acc: 0.828000
logistic train acc: 0.871000, test acc: 0.834000



In [292]:
def multiclass_train(X, y, C,
                     w0=None, 
                     b0=None,
                     gd_type="sgd",
                     step_size=0.5, 
                     max_iterations=1000):
    """
    Inputs:
    - X: training features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - y: multiclass training labels, a N dimensional numpy array where
    N is the number of training points, indicating the labels of 
    training data
    - C: number of classes in the data
    - gd_type: gradient descent type, either GD or SGD
    - step_size: step size (learning rate)
    - max_iterations: number of iterations to perform gradient descent

    Returns:
    - w: C-by-D weight matrix of multinomial logistic regression, where 
    C is the number of classes and D is the dimensionality of features.
    - b: bias vector of length C, where C is the number of classes
    """

    N, D = X.shape

    w = np.zeros((C, D))
    if w0 is not None:
        w = w0
    
    b = np.zeros(C)
    if b0 is not None:
        b = b0

    np.random.seed(42)
        
    
    if gd_type == "sgd":
        ############################################
        # TODO 6 : Edit this if part               #
        #          Compute w and b                 #
        w = np.zeros((C, D))
        b = np.zeros(C)
        
        def softmax(x):
            x = np.exp(x - np.max(x) )
            return x / np.sum(x)
        ############################################
        for i in range(max_iterations):
            xi = np.random.choice(N)
            x = X[xi]  # 1 * 2
            # yi = np.eye(C)[ y[xi] ] # 1 * C
            yi = y[xi]
            
            err = softmax( x.dot(w.T) + b )  # c * 1
            err[yi] = err[yi] - 1
            w_c = -step_size * np.outer(err, x)  #  C * 2
            b_c = -step_size * err  #  C
            w = w + w_c
            b = b + b_c
        
    elif gd_type == "gd":
        ############################################
        # TODO 7 : Edit this if part               #
        #          Compute w and b                 #
        w = np.zeros((C, D))
        b = np.zeros(C)
        
        y = np.eye(C)[y] # create n * C 
        
        # n * C 
        def softmax(x):
            x = np.exp(x - np.max(x, axis=1).reshape((-1,1)))
            return x / np.sum(x, axis=1).reshape((-1, 1))
        ############################################
        for i in range(max_iterations):
            
            err = softmax((X.dot(w.T).T + b.reshape((-1, 1))).T ) - y  # n*C
            w_c = -step_size * err.T.dot(X)/ N  #  C * 2
            b_c = -step_size * np.sum(err, axis=0) / N  #  C
            w = w + w_c
            b = b + b_c
    
    else:
        raise "Type of Gradient Descent is undefined."
    

    assert w.shape == (C, D)
    assert b.shape == (C,)

    return w, b


In [241]:
def softmax(x):
    x = np.exp(x - np.max(x, axis=1).reshape((-1, 1))   )
    return x / np.sum(x, axis=1).reshape((-1, 1))


def multiclass_predict(X, w, b):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - w: weights of the trained multinomial classifier, C-by-D 
    - b: bias terms of the trained multinomial classifier, length of C
    
    Returns:
    - preds: N dimensional vector of multiclass predictions.
    Outputted predictions should be from {0, C - 1}, where
    C is the number of classes
    """
    N, D = X.shape
    ############################################
    # TODO 8 : Edit this part to               #
    #          Compute preds                   #
    preds = np.zeros(N)
    ############################################
    preds = softmax((X.dot(w.T).T + b.reshape((-1, 1))).T )
    preds = np.argmax(preds, axis=1)
    
    assert preds.shape == (N,)
    return preds


In [123]:
np.eye(5)[np.array([1,2,3,0,2,2,3,2])]

array([[0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])

In [122]:
np.eye(6)[[2,2]]

array([[0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.]])

In [221]:
C = np.array(range(10)).reshape((2,5))
D = np.max(C, axis = 1)
C + D.reshape((-1, 1))

array([[ 4,  5,  6,  7,  8],
       [14, 15, 16, 17, 18]])

In [129]:
np.argmax(C, axis=1)

array([1, 1, 1, 1, 1])

In [277]:
def run_multiclass():
    from data_loader import toy_data_multiclass, \
                            data_loader_mnist 
    import time
    datasets = [(toy_data_multiclass(), 'Toy Data multiclass', 3),\
                (data_loader_mnist(), 'MNIST', 10)]

    for data, name, num_classes in datasets:
        print('%s: %d class classification' % (name, num_classes))
        X_train, X_test, y_train, y_test = data
        for gd_type in ["sgd", "gd"]:
            s = time.time()
            w, b = multiclass_train(X_train, y_train, C=num_classes, gd_type=gd_type)
            print(gd_type + ' training time: %0.6f seconds' %(time.time()-s))
                
            #print("w:", w)
            #print("b:", b)
            
            train_preds = multiclass_predict(X_train, w=w, b=b)
            preds = multiclass_predict(X_test, w=w, b=b)
            print('train acc: %f, test acc: %f' 
                % (accuracy_score(y_train, train_preds), accuracy_score(y_test, preds)))
        print()

In [278]:
run_multiclass()

Toy Data multiclass: 3 class classification
sgd training time: 0.307604 seconds
train acc: 0.948571, test acc: 0.948333
gd training time: 0.155063 seconds
train acc: 0.950000, test acc: 0.950000

MNIST: 10 class classification
sgd training time: 0.474897 seconds
train acc: 0.819800, test acc: 0.800000
gd training time: 3.918744 seconds
train acc: 0.945400, test acc: 0.896000

