## Logistic Regression and Gradient Descent

In [1]:
import numpy as np
from numpy import genfromtxt
import math

### Question 1 ###

# Load data
data = genfromtxt('LRTrain.csv', delimiter=',',skip_header = 1)

n = data.shape[0]
d = data.shape[1]-1
T = 2000 # number of iterations
eps = 0.28 # tolerance 
step = 0.000005 # step size

# get feature data
x = data[:,0:d]

# get label data
y = data[:,d]

w = np.zeros(d)

In [2]:
## Define gradient function
def grad(w,x,y):
    g = np.zeros(d)
    for i in range(n):
        g = g + (np.subtract(1/(1+np.exp(-np.dot(w,x[i,:]))),y[i]) * x[i,:])
    return (1/n)*g

## Define negative log likelihood function
def fval(w,x,y):
    v = 0;
    for i in range(n):
        v = v + y[i] * math.log(1+np.exp(-np.dot(w,x[i,:]))) + (1-y[i])*math.log(1+np.exp(np.dot(w,x[i,:])))
    return (1/n)*v


## Define norm of the gradient
def gradnorm(w,x,y):
    return np.linalg.norm(grad(w,x,y))


## Perform gradient descent
for t in range(T):
        w = w - step * grad(w,x,y)
        print("Step count: " + str(t) + ", Negative log likelihood: " + str(fval(w,x,y)))
        if fval(w,x,y) < eps:
            break
        
# save final weight vector        
w_hat = w

Step count: 0, Negative log likelihood: 0.6885736178387436
Step count: 1, Negative log likelihood: 0.6808500780711092
Step count: 2, Negative log likelihood: 0.6768020760674396
Step count: 3, Negative log likelihood: 0.6715705611467319
Step count: 4, Negative log likelihood: 0.6679067232861315
Step count: 5, Negative log likelihood: 0.6638565628639502
Step count: 6, Negative log likelihood: 0.6605237035331452
Step count: 7, Negative log likelihood: 0.6571229023426557
Step count: 8, Negative log likelihood: 0.6540683738994485
Step count: 9, Negative log likelihood: 0.6510563293172443
Step count: 10, Negative log likelihood: 0.6482248463849826
Step count: 11, Negative log likelihood: 0.6454615042155933
Step count: 12, Negative log likelihood: 0.642803776916634
Step count: 13, Negative log likelihood: 0.6402111405532016
Step count: 14, Negative log likelihood: 0.6376894647544197
Step count: 15, Negative log likelihood: 0.6352238687423419
Step count: 16, Negative log likelihood: 0.63281254

In [3]:
### Question 2 ###

# Load test data
data = genfromtxt('LRTest.csv', delimiter=',',skip_header = 1)

n = data.shape[0]
d = data.shape[1]-1

# get feature data
x = data[:,0:d]

# get label data
y = data[:,d]

# set threshold
t = 0.1;

d1 = dict()

for t in np.arange(0,1.1,0.1):
    pred = np.zeros(n)
    
    for i in range(n):
        if 1/(1+np.exp(-np.dot(w_hat,x[i,:])))>t:
            pred[i]=1
        elif 1/(1+np.exp(-np.dot(w_hat,x[i,:])))<t:
            pred[i]=0

    TP = np.zeros(n) # number of true positives
    FP = np.zeros(n) # number of false positives
    TN = np.zeros(n) # number of true negatives
    FN = np.zeros(n) # number of false negatives

    for i in range(n):
        if pred[i]==1 and y[i] == 0:
            FP[i]=1
        elif pred[i]==0 and y[i] == 1:
            FN[i]=1
        elif pred[i]==1 and y[i] == 1:
            TP[i]=1
        elif pred[i]==0 and y[i] == 0:
            TN[i]=1
            
    TPR = round(sum(TP) / (sum(TP) + sum(FN)),4)
    FPR = round(sum(FP) / (sum(TN) + sum(FP)),4)
    TNR = round(1 - FPR,4)
    FNR = round(1 - TPR,4)
    
    d1[round(t,2)] = [round(t,2),TPR,FPR,TNR,FNR]
    
print ("{:<15} {:<10} {:<10} {:<10} {:<10}".format('Threshold', 'TPR', 'FPR', 'TNR', 'FNR'))

for b, a in d1.items():
    threshold,tpr,fpr,tnr,fnr = a
    print("{:<15} {:<10} {:<10} {:<10} {:<10}".format(threshold,tpr,fpr,tnr,fnr))

Threshold       TPR        FPR        TNR        FNR       
0.0             1.0        1.0        0.0        0.0       
0.1             0.9898     0.9591     0.0409     0.0102    
0.2             0.9592     0.4152     0.5848     0.0408    
0.3             0.9184     0.1637     0.8363     0.0816    
0.4             0.8571     0.0643     0.9357     0.1429    
0.5             0.8265     0.0351     0.9649     0.1735    
0.6             0.8061     0.0175     0.9825     0.1939    
0.7             0.7653     0.0058     0.9942     0.2347    
0.8             0.6939     0.0058     0.9942     0.3061    
0.9             0.602      0.0        1.0        0.398     
1.0             0.0        0.0        1.0        1.0       
