In [55]:
import numpy as np
from scipy import sparse


class LogisticRegression:
    def __init__(self):
        self.w = None
        self.loss_history = None

    def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
              batch_size=200, verbose=False):
        """
        Train this classifier using stochastic gradient descent.

        Inputs:
        - X: N x D array of training data. Each training point is a D-dimensional
             column.
        - y: 1-dimensional array of length N with labels 0-1, for 2 classes.
        - learning_rate: (float) learning rate for optimization.
        - reg: (float) regularization strength.
        - num_iters: (integer) number of steps to take when optimizing
        - batch_size: (integer) number of training examples to use at each step.
        - verbose: (boolean) If true, print progress during optimization.

        Outputs:
        A list containing the value of the loss function at each training iteration.
        """
        # Add a column of ones to X for the bias sake.
        X = LogisticRegression.append_biases(X)
        num_train, dim = X.shape
        if self.w is None:
            # lazily initialize weights
            self.w = np.random.randn(dim) * 0.01

        # Run stochastic gradient descent to optimize W
        self.loss_history = []
        for it in xrange(num_iters):
            #########################################################################
            # TODO:                                                                 #
            # Sample batch_size elements from the training data and their           #
            # corresponding labels to use in this round of gradient descent.        #
            # Store the data in X_batch and their corresponding labels in           #
            # y_batch; after sampling X_batch should have shape (batch_size, dim)   #
            # and y_batch should have shape (batch_size,)                           #
            #                                                                       #
            # Hint: Use np.random.choice to generate indices. Sampling with         #
            # replacement is faster than sampling without replacement.              #
            #########################################################################
            inds = np.random.choice(X.shape[0], batch_size)
            X_batch = X[inds][:]
            y_batch = y[inds]
            #########################################################################
            #                       END OF YOUR CODE                                #
            #########################################################################

            # evaluate loss and gradient
            loss, gradW = self.loss(X_batch, y_batch, reg)
            self.loss_history.append(loss)
            # perform parameter update
            #########################################################################
            # TODO:                                                                 #
            # Update the weights using the gradient and the learning rate.          #
            #########################################################################
            self.w[inds]-= gradW * learning_rate
            #########################################################################
            #                       END OF YOUR CODE                                #
            #########################################################################

            if verbose and it % 100 == 0:
                print 'iteration %d / %d: loss %f' % (it, num_iters, loss)

        return self

    def predict_proba(self, X, append_bias=False):
        """
        Use the trained weights of this linear classifier to predict probabilities for
        data points.

        Inputs:
        - X: N x D array of data. Each row is a D-dimensional point.
        - append_bias: bool. Whether to append bias before predicting or not.

        Returns:
        - y_proba: Probabilities of classes for the data in X. y_pred is a 2-dimensional
          array with a shape (N, 2), and each row is a distribution of classes [prob_class_0, prob_class_1].
        """
        if append_bias:
            X = LogisticRegression.append_biases(X)
        ###########################################################################
        # TODO:                                                                   #
        # Implement this method. Store the probabilities of classes in y_proba.   #
        # Hint: It might be helpful to use np.vstack and np.sum                   #
        ###########################################################################
        X_w = map (lambda x: np.sum(x*self.w), X)
        proba_1 = 1. / (1. + np.exp(-X_W))
        proba_0 = 1 - proba_1
        y_proba = np.hstack(( np.reshape(proba_1, (len(proba_1,1))), np.reshape(proba_0, (len(proba_0,1)))))
        ###########################################################################
        #                           END OF YOUR CODE                              #
        ###########################################################################
        return y_proba

    def predict(self, X):
        """
        Use the ```predict_proba``` method to predict labels for data points.

        Inputs:
        - X: N x D array of training data. Each column is a D-dimensional point.

        Returns:
        - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
          array of length N, and each element is an integer giving the predicted
          class.
        """

        ###########################################################################
        # TODO:                                                                   #
        # Implement this method. Store the predicted labels in y_pred.            #
        ###########################################################################
        y_proba = self.predict_proba(X, append_bias=True)
        y_pred = map(lambda yy: 0. if yy < 0.5 else 1., y_proba)
    
        ###########################################################################
        #                           END OF YOUR CODE                              #
        ###########################################################################
        return y_pred

    def loss(self, X_batch, y_batch, reg):
        """Logistic Regression loss function
        Inputs:
        - X: N x D array of data. Data are D-dimensional rows
        - y: 1-dimensional array of length N with labels 0-1, for 2 classes
        Returns:
        a tuple of:
        - loss as single float
        - gradient with respect to weights w; an array of same shape as w
        """
        dw = np.zeros_like(self.w)  # initialize the gradient as zero
        loss = 0
        # Compute loss and gradient. Your code should not contain python loops.
        X_w = map (lambda x: np.sum(x*self.w), X_batch)
        loss = -np.sum(y_batch * np.log(1. / (1.+np.exp(-X_w)))) - np.sum((1-y_batch) * np.log(1. - 1. / (1.+np.exp(-X_w))))          
        dw = np.dot((y_batch - (1. / (1.+np.exp(X_w)))), X_w)
        # Right now the loss is a sum over all training examples, but we want it
        # to be an average instead so we divide by num_train.
        # Note that the same thing must be done with gradient.
        

        # Add regularization to the loss and gradient.
        # Note that you have to exclude bias term in regularization.


        return loss, dw

    @staticmethod
    def append_biases(X):
        return sparse.hstack((X, np.ones(X.shape[0])[:, np.newaxis])).tocsr()

In [54]:
tmp = LogisticRegression()

In [None]:
tmp.

In [2]:
import numpy as np
from scipy import sparse

In [10]:
np.hstack((np.array([[1,2],[3,4]]), np.array([1,1])[:, np.newaxis]))

array([[1, 2, 1],
       [3, 4, 1]])

In [29]:
X = np.array([[1,2],[3,4],[5,6]])
w = np.array([1,-2])
res = np.array(map (lambda x: np.sum(x*w), X))
print res
print np.array([1,0,1]) - np.exp(res)

[-3 -5 -7]
[ 0.95021293 -0.00673795  0.99908812]


In [15]:
X[[0,1]][:]

array([[1, 2],
       [3, 4]])

In [24]:
x = np.array([1.,2.,3.,4.,5.])
1. / (1 + np.exp(-x))

array([ 0.73105858,  0.88079708,  0.95257413,  0.98201379,  0.99330715])

In [20]:
xl = np.log(x)
xl

array([ 0.        ,  0.69314718,  1.09861229,  1.38629436,  1.60943791])

In [19]:
xl * x

array([ 0.        ,  1.38629436,  3.29583687,  5.54517744,  8.04718956])

In [None]:
y = [1., 0., 1., 1.]
x = [5., 1., 5., ]

In [34]:
y = [0.4, 0.8, 0.55]
map (lambda yy: 0. if yy < 0.5 else 1., y)

[0.0, 1.0, 1.0]

In [44]:
y1 = np.reshape(np.array([1,2,3,4,5]), (5,1))
y2 = np.reshape(np.array([11,12,13,14,15]), (5,1))
y1

array([[1],
       [2],
       [3],
       [4],
       [5]])

In [46]:
np.hstack((y1, y2))

array([[ 1, 11],
       [ 2, 12],
       [ 3, 13],
       [ 4, 14],
       [ 5, 15]])

In [50]:
np.dot([1,1], [[1,2,3],[4,5,6]])

array([5, 7, 9])