In [1]:
import pandas as pd
pd.read_csv('application_train.csv')

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307506,456251,0,Cash loans,M,N,N,0,157500.0,254700.0,27558.0,...,0,0,0,0,,,,,,
307507,456252,0,Cash loans,F,N,Y,0,72000.0,269550.0,12001.5,...,0,0,0,0,,,,,,
307508,456253,0,Cash loans,F,N,Y,0,153000.0,677664.0,29979.0,...,0,0,0,0,1.0,0.0,0.0,1.0,0.0,1.0
307509,456254,1,Cash loans,F,N,Y,0,171000.0,370107.0,20205.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


## Model Implementation (Each function computes the current gradient)

In [2]:
# Logistic Regression Gradient
def logistic(X, Y, B):
    half =  np.exp(X @ B)
    p = half / (1 + half)
    gradient = -1 * (Y - p).T @ X
    
    return gradient

# Logistic Regression with Lasso Penalty
def logistic_lasso(X, Y, B, lamb):
    half =  np.exp(X @ B)
    p = half / (1 + half)
    partial_gradient = -1 * (Y - p).T @ X
    
    # Add on +/- lambda * B to the gradient
    lamb_beta = np.nan_to_num(lamb * -1 * (B / (B * -1)))
    gradient = partial_gradient + lamb_beta
    
    return gradient

# Returns predictions for logistic regression
def logistic_predict_prob(X, B):
    return 1 / (1 + np.exp(-1 * X @ B))



## Evaluation Metrics

In [None]:
def get_accuracy(p, Y):
    return sum((p > .5) == Y) / len(Y)

def get_precision(p, Y):
    # precision = TP / (TP + FP)
    TP = np.where(Y, ((p > .5) == Y), False).sum()
    FP = np.where(Y == 0, (p > .5), False).sum()
    return TP / (TP + FP)
   
def get_recall(p, Y):
    # recall = TP / (TP + FN)
    TP = np.where(Y, ((p > .5) == Y), False).sum()
    FN = np.where(Y, ((p > .5) != Y), False).sum()
    return TP / (TP + FN)

def get_f1(p, Y):
    precision = get_precision(p, Y)
    recall = get_recall(p, Y)
    return 2 / ((1 / precision) + (1 / recall))
    
# Compute Accuracy, precision, recall, and f1
def compute_metrics(p, Y):
    # precision = TP / (TP + FP)
    # recall = TP / (TP + FN)
    accuracy = sum((p > .5) == Y) / len(Y)
    
    TP = np.where(Y, ((p > .5) == Y), False).sum()
    FP = np.where(Y == 0, (p > .5), False).sum()
    precision = TP / (TP + FP)
    
    FN = np.where(Y, ((p > .5) != Y), False).sum()
    recall = TP / (TP + FN)
    
    f1 = 2 / ((1 / precision) + (1 / recall))
    print(f"Accuracy: {round(accuracy, 3)}; Precision: {round(precision, 3)}; " + \
           f"Recall: {round(recall, 3)}; f1: {round(f1, 3)}")

## Gradient Descent Implementation (Pass in a regression function, X, Y, and any optional arguments)

In [None]:
def gradient_descent(reg_func, X, Y, *reg_func_args, initial_B=None, max_iterations=75000, tol = .00001,
                     etas=[.1, .01, .001, .0001, .00001, .000001], err=False):
    if not err:
        np.seterr(all="ignore")
    else:
        np.seterr(all="warn")
    
    for eta in etas:
        # reset
        iterations = 0
        if initial_B is not None:
            B = initial_B
        else:
            B = np.zeros(len(X[0]))
        gradient = np.zeros(len(X[0]))
        while iterations < max_iterations and np.isinf(B).sum() == 0 and \
              (iterations == 0 or (eta * (gradient ** 2)).sum() > tol):
            # calls the regression function
            gradient = reg_func(X, Y, B, *reg_func_args)
            B = B - ((eta * gradient)
            iterations += 1

        if iterations < max_iterations and np.isinf(B).sum() == 0 and np.isnan(B).sum() == 0:
            print(f'Gradient converged w/ {iterations} iterations and eta = {eta}')
            np.seterr(all="warn")
            return B
        print(f'Eta: {eta}; Iterations: {iterations}')
    print('GRADIENT DID NOT CONVERGE. RESULTS ARE BAD')
    np.seterr(all="warn")
    return B

# The code below uses the 'Adagrad' gradient descent optimization algorithm to adapt the 
# learning rate for each dimension. There are other versions of this that may be more effective
# Directions as to how this works as well as other ideas: 
# https://ruder.io/optimizing-gradient-descent/index.html#momentum
def adaptive_gradient_descent(reg_func, X, Y, *reg_func_args, initial_B=None, max_iterations=100000, 
                              tol = .001, etas=[.1], err=False):
    if not err:
        np.seterr(all="ignore")
    else:
        np.seterr(all="warn")
    
    for eta in etas:
        # reset
        iterations = 0
        if initial_B is not None:
            B = initial_B
        else:
            B = np.zeros(len(X[0]))
        gradient = np.zeros(len(X[0]))
        SS_past_gradients = np.zeros(len(X[0]))
        while iterations < max_iterations and np.isinf(B).sum() == 0 and \
              (iterations == 0 or (eta * (gradient ** 2)).sum() > tol):
            # calls the regression function
            gradient = reg_func(X, Y, B, *reg_func_args)
            
            # Where SS_past_gradients is sum of squares of past gradients
            SS_past_gradients += gradient ** 2
            #print(B)
            B = B - ((eta * gradient) / (np.sqrt(SS_past_gradients) + 1e-8))
            
            iterations += 1

        if iterations < max_iterations and np.isinf(B).sum() == 0 and np.isnan(B).sum() == 0:
            print(f'Gradient converged w/ {iterations} iterations and eta = {eta}')
            np.seterr(all="warn")
            return B
        print(f'Eta: {eta}; Iterations: {iterations}')
    print('GRADIENT DID NOT CONVERGE. RESULTS ARE BAD')
    np.seterr(all="warn")
    return B