## logstic

### y = {0,1}

In [1]:
from __future__ import division, print_function

import numpy as np


#######################################################################
# Replace TODO with your code
#######################################################################
def softmax(x):
    # avoid overflow

    if len(x.shape) ==1:
        c = np.max(x,0)
        exp_x = np.exp(x - c)
        sum_exp_x = np.sum(exp_x,0)
        y = exp_x / sum_exp_x
    else:
        c = np.transpose(np.tile(np.max(x,1),(x.shape[1],1)))
        exp_x = np.exp(x - c)
        sum_exp_x = np.sum(exp_x,1)
        y = np.transpose(np.divide(exp_x.T,sum_exp_x))

    return y 

def underlog(x):
    x = x - np.max(x)
    y = x - np.log(np.sum(np.exp(x)))
    
    return y

def binary_train(X, y, w0=None, b0=None, step_size=0.5, max_iterations=1000):
    """
    Inputs:
    - X: training features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - y: binary training labels, a N dimensional numpy array where 
    N is the number of training points, indicating the labels of 
    training data
    - step_size: step size (learning rate)
    - max_iterations: number of iterations to perform gradient descent

    Returns:
    - w: D-dimensional vector, a numpy array which is the weight 
    vector of logistic regression
    - b: scalar, which is the bias of logistic regression

    Find the optimal parameters w and b for inputs X and y.
    Use the *average* of the gradients for all training examples
    multiplied by the step_size to update parameters.
    """
    N, D = X.shape
    assert len(np.unique(y)) == 2


    w = np.zeros(D)
    if w0 is not None:
        w = w0
    
    b = 0
    if b0 is not None:
        b = b0


    """
    TODO: add your code here
    """
    ## w = w - step * sum(σ(wxn+b)-yn)xn
    ## b = b - step * sum(σ(wxn+b)-yn)
    ## wtx + b = np.sum(X_train * w.T,1) +b
    # x * np.tile(dfn, (D,1)).T
    for i in range(max_iterations):
        z = (np.sum(X * w.T,1) +b)
        sgm = sigmoid(z) - y 
        b = b - step_size * np.sum(sgm,0)
        w = w - step_size * np.sum(X * np.tile(sgm, (D,1)).T, 0)
        
    assert w.shape == (D,)
    return w, b


def binary_predict(X, w, b):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    
    Returns:
    - preds: N dimensional vector of binary predictions: {0, 1}
    """
    N, D = X.shape
    preds = np.zeros(N) 


    """
    TODO: add your code here
    """
    y = np.sum(X * w, 1) + b
    preds = np.round(sigmoid(y))
    
    assert preds.shape == (N,) 
    return preds


def multinomial_train(X, y, C, 
                     w0=None, 
                     b0=None, 
                     step_size=0.5, 
                     max_iterations=1000):
    """
    Inputs:
    - X: training features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - y: multiclass training labels, a N dimensional numpy array where
    N is the number of training points, indicating the labels of 
    training data
    - C: number of classes in the data
    - step_size: step size (learning rate)
    - max_iterations: number of iterations to perform gradient descent

    Returns:
    - w: C-by-D weight matrix of multinomial logistic regression, where 
    C is the number of classes and D is the dimensionality of features.
    - b: bias vector of length C, where C is the number of classes

    Implement multinomial logistic regression for multiclass 
    classification. Again use the *average* of the gradients for all training 
    examples multiplied by the step_size to update parameters.
    
    You may find it useful to use a special (one-hot) representation of the labels, 
    where each label y_i is represented as a row of zeros with a single 1 in
    the column, that corresponds to the class y_i.
    """

    N, D = X.shape

    w = np.zeros((C, D))
    if w0 is not None:
        w = w0
    
    b = np.zeros(C)
    if b0 is not None:
        b = b0


    """
    TODO: add your code here
    """
    ## w = w - step * sum(σ(wxn+b)-yn)xn
    ## b = b - step * sum(σ(wxn+b)-yn)
    ## wtx + b = np.sum(X_train * w.T,1) +b
    
    # class label to one-hot
    yc = np.identity(C)[y].astype(int)
    
    for i in range(max_iterations):
        wxb = np.dot(X,w.T) + np.tile(b,(N,1))
        smyx = np.multiply(np.tile((softmax(wxb) - yc),(D,1))
                           ,np.tile(X.ravel(order='F'),(C,1)).T).reshape(D, N, C)      
        w = w - step_size * np.sum(smyx,1).T
        b = b - step_size * np.sum((softmax(wxb) - yc),0)

        
    assert w.shape == (C, D)
    assert b.shape == (C,)
    return w, b


def multinomial_predict(X, w, b):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - w: weights of the trained multinomial classifier
    - b: bias terms of the trained multinomial classifier
    
    Returns:
    - preds: N dimensional vector of multiclass predictions.
    Outputted predictions should be from {0, C - 1}, where
    C is the number of classes

    Make predictions for multinomial classifier.
    """
    N, D = X.shape
    C = w.shape[0]
    preds = np.zeros(N) 

    """
    TODO: add your code here
    """   
    # deterministic prediction == no need to sigmoid
    yc = np.dot(X, w.T) +  np.tile(b,(N,1))
    # one-hot to class label
    preds = np.argmax(yc, axis = 1)

    assert preds.shape == (N,)
    return preds


def OVR_train(X, y, C, w0=None, b0=None, step_size=0.5, max_iterations=1000):
    """
    Inputs:
    - X: training features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - y: multiclass training labels, a N dimensional numpy array, 
    indicating the labels of each training point
    - C: number of classes in the data
    - w0: initial value of weight matrix
    - b0: initial value of bias term
    - step_size: step size (learning rate)
    - max_iterations: number of iterations to perform gradient descent

    Returns:
    - w: a C-by-D weight matrix of OVR logistic regression
    - b: bias vector of length C

    Implement multiclass classification using one-versus-rest with binary logistic 
    regression as the black-box. Recall that the one-versus-rest classifier is 
    trained by training C different classifiers. 
    """
    N, D = X.shape
    
    w = np.zeros((C, D))
    if w0 is not None:
        w = w0
    
    b = np.zeros(C)
    if b0 is not None:
        b = b0

    """
    TODO: add your code here
    """
    for c in range(C):
        yc = (y==c).astype(int)
        w[c,:], b[c] = binary_train(X, yc, w0=w[c,:], b0=b[c], step_size=0.5, max_iterations=1000)
    
    
    assert w.shape == (C, D), 'wrong shape of weights matrix'
    assert b.shape == (C,), 'wrong shape of bias terms vector'
    return w, b


def OVR_predict(X, w, b):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the 
    number of training points and D is the dimensionality of features
    - w: weights of the trained OVR model
    - b: bias terms of the trained OVR model
    
    Returns:
    - preds: vector of class label predictions.
    Outputted predictions should be from {0, C - 1}, where
    C is the number of classes.

    Make predictions using OVR strategy and probability predictions from binary
    classifiers. 
    """
    N, D = X.shape
    C = w.shape[0]
    preds = np.zeros(N) 
    
    """
    TODO: add your code here
    """
    yc = np.dot(X, w.T) +  np.tile(b,(N,1))
    preds = np.argmax(yc, axis = 1)
    
    assert preds.shape == (N,)
    return preds


#######################################################################
# DO NOT MODIFY THE CODE BELOW 
#######################################################################

def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def accuracy_score(true, preds):
    return np.sum(true == preds).astype(float) / len(true)

def run_binary():
    from data_loader import toy_data_binary, \
                            data_loader_mnist 

    print('Performing binary classification on synthetic data')
    X_train, X_test, y_train, y_test = toy_data_binary()
        
    w, b = binary_train(X_train, y_train)
    
    train_preds = binary_predict(X_train, w, b)
    preds = binary_predict(X_test, w, b)
    print('train acc: %f, test acc: %f' % 
            (accuracy_score(y_train, train_preds),
             accuracy_score(y_test, preds)))
    
    print('Performing binary classification on binarized MNIST')
    X_train, X_test, y_train, y_test = data_loader_mnist()

    binarized_y_train = [0 if yi < 5 else 1 for yi in y_train] 
    binarized_y_test = [0 if yi < 5 else 1 for yi in y_test] 
    
    w, b = binary_train(X_train, binarized_y_train)
    
    train_preds = binary_predict(X_train, w, b)
    preds = binary_predict(X_test, w, b)
    print('train acc: %f, test acc: %f' % 
            (accuracy_score(binarized_y_train, train_preds),
             accuracy_score(binarized_y_test, preds)))

def run_multiclass():
    from data_loader import toy_data_multiclass_3_classes_non_separable, \
                            toy_data_multiclass_5_classes, \
                            data_loader_mnist 
    
    datasets = [(toy_data_multiclass_3_classes_non_separable(), 
                        'Synthetic data', 3), 
                (toy_data_multiclass_5_classes(), 'Synthetic data', 5), 
                (data_loader_mnist(), 'MNIST', 10)]

    for data, name, num_classes in datasets:
        print('%s: %d class classification' % (name, num_classes))
        X_train, X_test, y_train, y_test = data
        
        print('One-versus-rest:')
        w, b = OVR_train(X_train, y_train, C=num_classes)
        train_preds = OVR_predict(X_train, w=w, b=b)
        preds = OVR_predict(X_test, w=w, b=b)
        print('train acc: %f, test acc: %f' % 
            (accuracy_score(y_train, train_preds),
             accuracy_score(y_test, preds)))
    
        print('Multinomial:')
        w, b = multinomial_train(X_train, y_train, C=num_classes)
        train_preds = multinomial_predict(X_train, w=w, b=b)
        preds = multinomial_predict(X_test, w=w, b=b)
        print('train acc: %f, test acc: %f' % 
            (accuracy_score(y_train, train_preds),
             accuracy_score(y_test, preds)))

"""if __name__ == '__main__':
    
    import argparse
    import sys

    parser = argparse.ArgumentParser()
    parser.add_argument("--type", )
    parser.add_argument("--output")
    args = parser.parse_args()

    if args.output:
            sys.stdout = open(args.output, 'w')

    if not args.type or args.type == 'binary':
        run_binary()
    
    if not args.type or args.type == 'multiclass':
        run_multiclass()
"""

'if __name__ == \'__main__\':\n    \n    import argparse\n    import sys\n\n    parser = argparse.ArgumentParser()\n    parser.add_argument("--type", )\n    parser.add_argument("--output")\n    args = parser.parse_args()\n\n    if args.output:\n            sys.stdout = open(args.output, \'w\')\n\n    if not args.type or args.type == \'binary\':\n        run_binary()\n    \n    if not args.type or args.type == \'multiclass\':\n        run_multiclass()\n'

In [2]:
from data_loader import toy_data_binary, \
                   data_loader_mnist 
from sklearn.linear_model import LogisticRegression

### confrim binary 

In [3]:
print('Performing binary classification on synthetic data')
X_train, X_test, y_train, y_test = toy_data_binary()
    
w, b = binary_train(X_train, y_train)

train_preds = binary_predict(X_train, w, b)
preds = binary_predict(X_test, w, b)
print('train acc: %f, test acc: %f' % 
        (accuracy_score(y_train, train_preds),
         accuracy_score(y_test, preds)))

# sklearn
lr = LogisticRegression(C=1e15, max_iter=1000, solver="saga")
lr.fit(X_train, y_train)
print('SK-learn')
print('train acc: %f, test acc: %f' % 
      (lr.score(X_train, y_train),lr.score(X_test, y_test)))


Performing binary classification on synthetic data
train acc: 0.994286, test acc: 1.000000
SK-learn
train acc: 0.994286, test acc: 1.000000




In [4]:
print('Performing binary classification on binarized MNIST')
X_train, X_test, y_train, y_test = data_loader_mnist()

binarized_y_train = [0 if yi < 5 else 1 for yi in y_train] 
binarized_y_test = [0 if yi < 5 else 1 for yi in y_test] 
    
w, b = binary_train(X_train, binarized_y_train, max_iterations=1000)
    
train_preds = binary_predict(X_train, w, b)
preds = binary_predict(X_test, w, b)
print('train acc: %f, test acc: %f' % 
        (accuracy_score(binarized_y_train, train_preds),
         accuracy_score(binarized_y_test, preds)))

lrmn = LogisticRegression(C=1e15, max_iter=1000, solver="saga")
lrmn.fit(X_train, binarized_y_train)
print('SK-learn')
print('train acc: %f, test acc: %f' % 
      (lrmn.score(X_train, binarized_y_train),lrmn.score(X_test, binarized_y_test)))

Performing binary classification on binarized MNIST
train acc: 0.695000, test acc: 0.695000
SK-learn
train acc: 0.899600, test acc: 0.812000


### confrim multi class

In [5]:
from data_loader import toy_data_multiclass_3_classes_non_separable, \
                        toy_data_multiclass_5_classes, \
                        data_loader_mnist 

datasets = [(toy_data_multiclass_3_classes_non_separable(), 
                    'Synthetic data', 3), 
            (toy_data_multiclass_5_classes(), 'Synthetic data', 5), 
            (data_loader_mnist(), 'MNIST', 10)]

for data, name, num_classes in datasets:
    print('%s: %d class classification' % (name, num_classes))
    X_train, X_test, y_train, y_test = data
    
    print('One-versus-rest:')
    w, b = OVR_train(X_train, y_train, C=num_classes)
    train_preds = OVR_predict(X_train, w=w, b=b)
    preds = OVR_predict(X_test, w=w, b=b)
    print('train acc: %f, test acc: %f' % 
        (accuracy_score(y_train, train_preds),
         accuracy_score(y_test, preds)))
    
    print('Multinomial:')
    w, b = multinomial_train(X_train, y_train, C=num_classes)
    train_preds = multinomial_predict(X_train, w=w, b=b)
    preds = multinomial_predict(X_test, w=w, b=b)
    print('train acc: %f, test acc: %f' % 
        (accuracy_score(y_train, train_preds),
         accuracy_score(y_test, preds)))
    
    print('SK-learn:')
    lrmt = LogisticRegression(C=1e15, max_iter=1000, solver="saga")
    lrmt.fit(X_train, y_train)
    print('train acc: %f, test acc: %f' % 
      (lrmt.score(X_train, y_train),lrmt.score(X_test, y_test)))



Synthetic data: 3 class classification
One-versus-rest:
train acc: 0.871429, test acc: 0.846667
Multinomial:
train acc: 0.897143, test acc: 0.846667
SK-learn:
train acc: 0.908571, test acc: 0.840000
Synthetic data: 5 class classification
One-versus-rest:
train acc: 0.697143, test acc: 0.666667
Multinomial:
train acc: 0.825714, test acc: 0.786667
SK-learn:
train acc: 0.865714, test acc: 0.846667
MNIST: 10 class classification
One-versus-rest:
train acc: 0.895000, test acc: 0.829000
Multinomial:
train acc: 0.937600, test acc: 0.856000
SK-learn:
train acc: 1.000000, test acc: 0.793000
