## Logistic Regression Classifier

In [None]:
%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

In [None]:
def construct_dataset():
    # Iris dataset.
    iris = datasets.load_iris()     # Load Iris dataset.

    X = iris.data                   # The shape of X is (150, 4), which means
                                    # there are 150 data points, each data point
                                    # has 4 features.

    # Here for convenience, we divide the 3 kinds of flowers into 2 groups: 
    #     Y = 0 (or False):  Setosa (original value 0) / Versicolor (original value 1)
    #     Y = 1 (or True):   Virginica (original value 2)

    # Thus use (iris.target > 1.5) to divide the targets into 2 groups. 
    # This line of code will assign:
    #    Y[i] = True  (which is equivalent to 1) if iris.target[k]  > 1.5 (Virginica)
    #    Y[i] = False (which is equivalent to 0) if iris.target[k] <= 1.5 (Setosa / Versicolor)

    Y = (iris.target > 1.5).reshape(-1,1).astype(float) # The shape of Y is (150, 1), which means 
                                    # there are 150 data points, each data point
                                    # has 1 target value. 
    Y[Y==0] = -1

    X_and_Y = np.hstack((X, Y))     # Stack them together for shuffling.
    np.random.seed(1)               # Set the random seed.
    np.random.shuffle(X_and_Y)      # Shuffle the data points in X_and_Y array

    print("X.shape", X.shape)
    print("Y.shape", Y.shape)
    print("X_and_Y[0]", X_and_Y[0])  # The result should be always: [ 5.8  4.   1.2  0.2  0. ]

    # Divide the data points into training set and test set.
    X_shuffled = X_and_Y[:,:4]
    Y_shuffled = X_and_Y[:,4]


    X_train = X_shuffled[:100][:,[3,1]] # Shape: (100,2)
    X_train = np.delete(X_train, 42, axis=0) # Remove a point for separability.
    Y_train = Y_shuffled[:100]          # Shape: (100,)
    Y_train = np.delete(Y_train, 42, axis=0) # Remove a point for separability.
    X_test = X_shuffled[100:][:,[3,1]]  # Shape: (50,2)
    Y_test = Y_shuffled[100:]           # Shape: (50,)
    print("X_train.shape", X_train.shape)
    print("Y_train.shape", Y_train.shape)
    print("X_test.shape", X_test.shape)
    print("Y_test.shape", Y_test.shape)

    return X_train, Y_train, X_test, Y_test

X_train, Y_train, X_test, Y_test = construct_dataset()

In [None]:
def vis(X, Y, W=None, b=None):
    indices_neg1 = (Y == -1).nonzero()[0]
    indices_pos1 = (Y == 1).nonzero()[0]
    plt.scatter(X[:,0][indices_neg1], X[:,1][indices_neg1], 
                c='blue', label='class -1')
    plt.scatter(X[:,0][indices_pos1], X[:,1][indices_pos1], 
                c='red', label='class 1')
    plt.legend()
    plt.xlabel('$x_0$')
    plt.ylabel('$x_1$')
    
    if W is not None:
        # w0x0+w1x1+b=0 => x1=-w0x0/w1-b/w1
        w0 = W[0]
        w1 = W[1]
        temp = -w1*np.array([X[:,1].min(), X[:,1].max()])/w0-b/w0
        x0_min = max(temp.min(), X[:,0].min())
        x0_max = min(temp.max(), X[:,1].max())
        x0 = np.linspace(x0_min,x0_max,100)
        x1 = -w0*x0/w1-b/w1
        plt.plot(x0,x1,color='black')

    plt.show()

In [None]:
vis(X_train, Y_train)

In [None]:
vis(X_test, Y_test)

In [None]:
# Sigmoid function: sigmoid(z) = 1/(1 + e^(-z))
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

In [None]:
# Judge function: 1(a != b).
def judge(a, b):
    """
    Judge function: 1(a != b).
    Return 1 if a != b, otherwise return 0.
    """
    if a != b:
        return 1
    else:
        return 0
    
def f_logistic(x, W, b):
    """
    Logistic classifier: f(x, W, b)
    This function should return -1 or 1.

    x should be a 2-dimensional vector, 
    W should be a 2-dimensional vector,
    b should be a scalar.
    """
    z = np.dot(W, x) + b # calc value of z = linear combo 
    if sigmoid(z) >= 0.5:
        return 1
    else:
        return -1
    
# Calculate error given feature vectors X and labels Y.
def calc_error(X, Y, W, b):
    e = 0
    n = len(Y)
    for (xi, yi) in zip(X, Y):
        prediction_value = f_logistic(xi, W, b)
        e = e + judge(yi, prediction_value)
    
    e = e/n
    return e

In [None]:
# Gradient of L(W, b) with respect to W and b.
def grad_L_W_b(X, Y, W, b):
    z = X @ W + b
    prob = sigmoid(-Y * z)

    grad_W = -(Y * prob) @ X # grad with respect to W
    grad_b = -np.sum(Y * prob) # grad with respect to b
                                
    return grad_W, grad_b

In [None]:
# Loss L(W, b).
def L_W_b(X, Y, W, b):
    z = X @ W + b
    output = -Y * z
    loss = np.sum(np.log(1 + np.exp(output)))
    return loss