In [1]:
import numpy as np
import scipy.special
import scipy.optimize
import sklearn.datasets

class LinearLogisticRegression:

    def __init__(self, Dtrain, Ltrain, lbd):
        self.Dtrain = Dtrain
        self.Ltrain = Ltrain
        self.lbd = lbd
        self.F = Dtrain.shape[0]  # dimensionality of features space
        self.K = len(set(Ltrain))  # number of classes
        self.N = Dtrain.shape[1]

    def __compute_zi(self, ci):
        return 2 * ci - 1

    def __compute_T(self): # TODO: remove this function
        T = np.zeros(shape=(self.K, self.N))
        for i in range(self.Dtrain.shape[1]):
            label_xi = self.Ltrain[i]
            t = []
            for j in range(self.K):
                if j == label_xi:
                    t.append(1)
                else:
                    t.append(0)
            T[:, i] = t
        return T

    def __logreg_obj(self, v):  # still works if DTR is one sample only? yes but it must be of shape (4,1)
        w, b = v[0:-1], v[-1]
        J = self.lbd / 2 * (np.linalg.norm(w) ** 2)
        summary = 0
        for i in range(self.Dtrain.shape[1]):
            xi = self.Dtrain[:, i:i + 1]
            ci = self.Ltrain[i]
            zi = self.__compute_zi(ci)
            summary += np.logaddexp(0, -zi * (np.dot(w.T, xi) + b))
        J += (1 / self.Dtrain.shape[1]) * summary
        return J

    def train(self):
        self.x, f, d = scipy.optimize.fmin_l_bfgs_b(func=self.__logreg_obj,
                                                    x0=np.zeros(self.Dtrain.shape[0] + 1),
                                                    approx_grad=True,
                                                    iprint=0,maxiter=50)
        print('Point of minimum: %s' % (self.x))
        print('Value of the minimum: %s' % (f))
        print('Number of iterations: %s' % (d['funcalls']))
        return self

    def predict(self, Dtest, labels=True):
        w, b = self.x[0:-1], self.x[-1]
        S = np.zeros((Dtest.shape[1]))
        for i in range(Dtest.shape[1]):
            xi = Dtest[:, i:i + 1]
            s = np.dot(w.T, xi) + b
            S[i] = s
        if labels:
            LP = S > 0
            return LP
        else:
            return S

In [2]:
def load_iris_binary():
    D, L = sklearn.datasets.load_iris()['data'].T, sklearn.datasets.load_iris()['target']
    D = D[:, L != 0] # remove setosa from D
    L = L[L!=0] # remove setosa from L
    L[L==2] = 0 # We assign label 0 to virginica (was label 2)
    return D, L

In [3]:
D, L = load_iris_binary()