In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
'''Basic Logistic Regression'''

class LogisticRegression():
    
    def __init__(self, size):
        self.w = np.random.randn(size)
        self.b = np.random.randn(1)
    
    def fit(self, X_trn, y_trn,
            X_val, y_val,
            lr = 1e-1, epochs = 1e3,
            show_curve = False):
        
        epochs = int(epochs)
        N, D = X_trn.shape
        
        J_trn = np.zeros(epochs) # train loss
        J_val = np.zeros(epochs) # validation loss
        
        for epoch in range(epochs): # start to train
            # get probability
            p_hat = self.__forward(X_trn)
            # record training process
            J_trn[epoch] = cross_entropy(y_trn, p_hat)
            J_val[epoch] = cross_entropy(y_val, self.__forward(X_val))
            # weights update    
            self.w -= lr*(1/N)*X_trn.T@(p_hat - y_trn)
            self.b -= lr*(1/N)*np.sum(p_hat - y_trn)
            # print progress
            if epoch % 250 == 0:
                print('Epoch: {}, train error: {:.4f}, valid error: {:.4f}'.\
                      format(epoch, J_trn[epoch], J_val[epoch]))
        # plot curve
        if show_curve:
            plt.figure(figsize = (15, 6))
            # train plot
            plt.subplot(121); plt.plot(J_trn)
            plt.xlabel('epochs'); plt.ylabel('$\mathcal{J}$')
            plt.title('Training Curve', fontsize = 15)
            # valid plot
            plt.subplot(122); plt.plot(J_val)
            plt.xlabel('epochs'); plt.ylabel('$\mathcal{J}$')
            plt.title('Validation Curve', fontsize = 15)
        # return training process
        return {'J_trn': J_trn, 'J_val': J_val}
    
    def __forward(self, X):
        return sigmoid(X@self.w + self.b)
        
    def predict(self, X, thresh = 0.5):
        return (self.__forward(X) >= thresh).astype(np.int32)

In [None]:
''' Assistant Functions '''

def sigmoid(h, epsilon = 1e-5):
    return 1/(1 + np.exp(-h + epsilon))

def cross_entropy(y, p_hat, epsilon = 1e-3):
    return -(1/len(y)) * np.sum(y * np.log(p_hat + epsilon)\
                                + (1- y) * np.log(1 - p_hat + epsilon))

In [None]:
class WeightedLogisticRegression(LogisticRegression):
    '''Weighted Logistic Regression'''
    # Assume positive: minority dataset
    #        negative: majority dataset
    # Weight more on positive dataset
    # eta: control positive cases learning weights
    #      1 is the default, < 1 reduce the weights, > 1 increase the weights
    
    def fit(self, X_trn, y_trn,
            X_val, y_val,
            eta = 1,                 # weight for positive dataset
            lr = 1e-1, epochs = 1e3,
            show_curve = False, verbose = 0):
        epochs = int(epochs)
        N, D = X_trn.shape
        
        J_trn = np.zeros(epochs) # train loss
        J_val = np.zeros(epochs) # validation loss

        for epoch in range(epochs): # start to train
            # get probability for cross entropy
            p_hat = self.__forward(X_trn)
            # record training process
            J_trn[epoch] = weighted_cross_entropy(y_trn, p_hat, eta = eta)
            J_val[epoch] = weighted_cross_entropy(y_val, self.__forward(X_val), eta = eta)
            # weights update
            self.w -= lr*(eta*(1/len(y_trn[y_trn == 1]))*X_trn[y_trn == 1].T@(p_hat[y_trn == 1] - y_trn[y_trn == 1])+\
                          (1/len(y_trn[y_trn == 0]))*X_trn[y_trn == 0].T@(p_hat[y_trn == 0] - y_trn[y_trn == 0]))
            
            self.b -= lr*(eta*(1/len(y_trn[y_trn == 1]))*np.sum(p_hat[y_trn == 1] - y_trn[y_trn == 1])+\
                          (1/len(y_trn[y_trn == 0]))*np.sum(p_hat[y_trn == 0] - y_trn[y_trn == 0]))
            # print progress
            if verbose == 1:
                if epoch % 250 == 0:
                    print('Epoch: {}, train error: {:.4f}, validation error: {:.4f}'.\
                          format(epoch, J_trn[epoch], J_val[epoch]))
            else:
                pass
        # plot curve
        if show_curve:
            plt.figure(figsize = (15, 6))
            # train plot
            plt.subplot(121); plt.plot(J_trn)
            plt.xlabel('epochs'); plt.ylabel('$\mathcal{J}$')
            plt.title('Training Curve', fontsize = 15)
            # valid plot
            plt.subplot(122); plt.plot(J_val)
            plt.xlabel('epochs'); plt.ylabel('$\mathcal{J}$')
            plt.title('Validation Curve', fontsize = 15)
        # return training process
        return {'J_trn': J_trn, 'J_val': J_val}
    
    def __forward(self, X):
        return sigmoid(X@self.w + self.b)

In [None]:
''' Assistant Functions '''

def weighted_cross_entropy(y, p_hat, eta = 1, epsilon = 1e-3):
    pos_w = (1/len(y[y == 1]))*eta
    neg_w = 1/len(y[y == 0])
    pos = -pos_w*np.sum(y[y == 1] * np.log(p_hat[y == 1] + epsilon)\
                        +(1- y[y == 1]) * np.log(1 - p_hat[y == 1] + epsilon))
    neg = -neg_w*np.sum(y[y == 0] * np.log(p_hat[y == 0] + epsilon)\
                        +(1- y[y == 0]) * np.log(1 - p_hat[y == 0] + epsilon))
    return pos + neg