In [154]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import scipy.stats
from scipy.integrate import quad
from scipy.optimize import minimize
from scipy.special import expit, logit
from scipy.stats import norm

# Dataset

In [167]:
df = pd.read_csv("HeartFail/heart.csv")
df.drop('Sex', axis=1, inplace=True)
df.drop('Oldpeak', axis=1, inplace=True)
df.drop('ST_Slope', axis=1, inplace=True)
df

Unnamed: 0,Age,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,HeartDisease
0,40,ATA,140,289,0,Normal,172,N,0
1,49,NAP,160,180,0,Normal,156,N,1
2,37,ATA,130,283,0,ST,98,N,0
3,48,ASY,138,214,0,Normal,108,Y,1
4,54,NAP,150,195,0,Normal,122,N,0
...,...,...,...,...,...,...,...,...,...
913,45,TA,110,264,0,Normal,132,N,1
914,68,ASY,144,193,1,Normal,141,N,1
915,57,ASY,130,131,0,Normal,115,Y,1
916,57,ATA,130,236,0,LVH,174,N,1


## Missing Values?

In [168]:
missing = df.isnull().sum()
print(missing)

Age               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
HeartDisease      0
dtype: int64


## Changing Categorical Features to Numerical

In [169]:
char_cols = df.dtypes.pipe(lambda x: x[x == 'object']).index
label_mapping = {}

for c in char_cols:
    df[c], label_mapping[c] = pd.factorize(df[c])
    
df

Unnamed: 0,Age,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,HeartDisease
0,40,0,140,289,0,0,172,0,0
1,49,1,160,180,0,0,156,0,1
2,37,0,130,283,0,1,98,0,0
3,48,2,138,214,0,0,108,1,1
4,54,1,150,195,0,0,122,0,0
...,...,...,...,...,...,...,...,...,...
913,45,3,110,264,0,0,132,0,1
914,68,2,144,193,1,0,141,0,1
915,57,2,130,131,0,0,115,1,1
916,57,0,130,236,0,2,174,0,1


In [170]:
d = df.to_numpy()
X = d[:,:-1]
Y = d[:,-1]

## Train-Test Split

In [171]:
X_train, Xtest, Y, Ytest = train_test_split(X, Y, test_size=0.25)
X_train.shape, Xtest.shape, Y.shape, Ytest.shape

((688, 8), (230, 8), (688,), (230,))

## Normalising Dataset

In [172]:
scaler = preprocessing.StandardScaler().fit(X_train)
scaler.mean_, scaler.scale_

(array([ 53.43168605,   1.44040698, 132.71947674, 199.19912791,
          0.23110465,   0.59883721, 135.82412791,   0.41569767]),
 array([  9.55422021,   0.84869658,  19.08788042, 110.79791026,
          0.42153919,   0.79723274,  25.97068225,   0.49284188]))

In [173]:
X = scaler.transform(X_train)
X.mean(axis=0), X.std(axis=0)

(array([-1.28450222e-16,  8.22985091e-18,  3.71956987e-16,  1.71051803e-17,
         7.87483773e-17,  1.05535735e-16,  8.77850764e-17, -3.51785784e-16]),
 array([1., 1., 1., 1., 1., 1., 1., 1.]))

# Algorithms

## Logistic Regression (Newton Raphson)

In [131]:
def initialise_w(initialise):
    if(initialise == 'random'):
        w = np.random.randn(d,1)/10
        print("w is initialised from N[0,1]")
    elif(initialise == 'zeros'):
        w = np.zeros((d,1))
        print("w is initialised as a zero vector")
    else:
        print("Method unknown")
    return w

def compute_mu(X, w):
    mu = expit(np.dot(X,w))
    mu = mu.reshape(X.shape[0],1)
    return mu

def first_derivative(w):
    mu = compute_mu(X, w)
    epsilon = 1e-12

    grad = np.matmul(np.transpose(X), (mu-Y)) + w.reshape(d,1)
    grad = grad.squeeze()
    return(grad)

def second_deivative(w,X,y):
    mu = compute_mu(X, w)
    R = np.eye(n)
    for i in range(n):
        R[i,i] = mu[i,0] * (1-mu[i,0])
    return(np.dot(np.dot(np.transpose(X),R),X) + np.eye(d))

def test(w, X, y):
    n,d = X.shape
    mu = compute_mu(X, w)
    yhat = np.zeros((n,1)).astype(np.float64)
    yhat[mu>0.5]=1
    correct = np.sum(yhat==y)
    return(correct,n)

def train(initialise):

    np.random.seed(0)
    w = initialise_w(initialise)
    for j in range(100):

        grad1 = first_derivative(w.squeeze()).reshape(d,1)
        H = second_deivative(w, X, Y)
        delta_w = np.dot(np.linalg.inv(H),grad1)
        w = w - delta_w
        diff = np.linalg.norm(delta_w)

        correct,n = test(w, Xtest, Ytest)
        print("Iteration : {} \t Accuracy : {}%".format(j,correct/n*100))

        if(diff < 1e-5):
            print("tolerance reached at the iteration : ",j)
            break
    print("Training done...")
    print("Model weights : ", np.transpose(w))

n,d = X.shape
n1,d1 = Xtest.shape

Y = Y.reshape(n,1)
Ytest = Ytest.reshape(n1,1)

In [132]:
train('zeros')

w is initialised as a zero vector
Iteration : 0 	 Accuracy : 40.869565217391305%
Iteration : 1 	 Accuracy : 40.869565217391305%
Iteration : 2 	 Accuracy : 40.869565217391305%
Iteration : 3 	 Accuracy : 40.869565217391305%
Iteration : 4 	 Accuracy : 40.869565217391305%
Iteration : 5 	 Accuracy : 40.869565217391305%
tolerance reached at the iteration :  5
Training done...
Model weights :  [[-0.03326866 -0.5127731   0.68203272  0.04199804 -0.3172562   0.30101375
   0.02288527 -0.38568513  0.62637218  0.20231199  1.04643309]]


In [133]:
train('random')

w is initialised from N[0,1]
Iteration : 0 	 Accuracy : 40.869565217391305%
Iteration : 1 	 Accuracy : 40.869565217391305%
Iteration : 2 	 Accuracy : 40.869565217391305%
Iteration : 3 	 Accuracy : 40.869565217391305%
Iteration : 4 	 Accuracy : 40.869565217391305%
Iteration : 5 	 Accuracy : 40.869565217391305%
tolerance reached at the iteration :  5
Training done...
Model weights :  [[-0.03326866 -0.5127731   0.68203272  0.04199804 -0.3172562   0.30101375
   0.02288527 -0.38568513  0.62637218  0.20231199  1.04643309]]


### Removing columns

In [145]:
def initialise_w(initialise):
    if(initialise == 'random'):
        w = np.random.randn(d,1)/10
        print("w is initialised from N[0,1]")
    elif(initialise == 'zeros'):
        w = np.zeros((d,1))
        print("w is initialised as a zero vector")
    else:
        print("Method unknown")
    return w

def compute_mu(X, w):
    mu = expit(np.dot(X,w))
    mu = mu.reshape(X.shape[0],1)
    return mu

def first_derivative(w):
    mu = compute_mu(X, w)
    epsilon = 1e-12

    grad = np.matmul(np.transpose(X), (mu-Y)) + w.reshape(d,1)
    grad = grad.squeeze()
    return(grad)

def second_deivative(w,X,y):
    mu = compute_mu(X, w)
    R = np.eye(n)
    for i in range(n):
        R[i,i] = mu[i,0] * (1-mu[i,0])
    return(np.dot(np.dot(np.transpose(X),R),X) + np.eye(d))

def test(w, X, y):
    n,d = X.shape
    mu = compute_mu(X, w)
    yhat = np.zeros((n,1)).astype(np.float64)
    yhat[mu>0.5]=1
    correct = np.sum(yhat==y)
    return(correct,n)

def train(initialise):

    np.random.seed(0)
    w = initialise_w(initialise)
    for j in range(100):

        grad1 = first_derivative(w.squeeze()).reshape(d,1)
        H = second_deivative(w, X, Y)
        delta_w = np.dot(np.linalg.inv(H),grad1)
        w = w - delta_w
        diff = np.linalg.norm(delta_w)

        correct,n = test(w, Xtest, Ytest)
        print("Iteration : {} \t Accuracy : {}%".format(j,correct/n*100))

        if(diff < 1e-5):
            print("tolerance reached at the iteration : ",j)
            break
    print("Training done...")
    print("Model weights : ", np.transpose(w))

n,d = X.shape
n1,d1 = Xtest.shape

Y = Y.reshape(n,1)
Ytest = Ytest.reshape(n1,1)

train('random')

w is initialised from N[0,1]
Iteration : 0 	 Accuracy : 42.173913043478265%
Iteration : 1 	 Accuracy : 42.173913043478265%
Iteration : 2 	 Accuracy : 42.173913043478265%
Iteration : 3 	 Accuracy : 42.173913043478265%
Iteration : 4 	 Accuracy : 42.173913043478265%
tolerance reached at the iteration :  4
Training done...
Model weights :  [[ 0.08916873  0.75559468  0.08747335 -0.35199611  0.40731751  0.11405956
  -0.4523914   0.8352809 ]]


## Probit Regression (L-BFGS)

In [166]:
def initialise_w(initialise):
    if(initialise == 'random'):
        w = np.random.randn(d,1)/10
        print("w is initialised from N[0,1]")
    elif(initialise == 'zeros'):
        w = np.zeros((d,1))
        print("w is initialised as a zero vector")
    else:
        print("Method unknown")
    return w

def compute_mu(X, w):
    phi=np.dot(X,w)
    mu = norm.cdf(phi)
    mu = mu.reshape(X.shape[0],1)
    return mu

def first_derivative(w):
    mu = compute_mu(X, w)
    epsilon = 1e-12
    phi=np.dot(X,w)
    grad_mu = X*(scipy.stats.norm.pdf(phi,0,1).reshape(-1,1))
    return(np.sum((- Y*(1/(mu)) + (1-Y)*(1/(1+epsilon-mu)))*grad_mu,0) + w).squeeze()

def second_deivative(w,X,y):
    mu = compute_mu(X, w)
    R = np.eye(n)

    phi=np.dot(X,w)
    for i in range(n):
        t1 = (y[i] - mu[i,0])/(mu[i,0] * (1-mu[i,0]))
        t2 = scipy.stats.norm.pdf(phi[i,0],0,1)
        t3 = (1-y[i])/np.power(1-mu[i,0],2) + y[i]/np.power(mu[i,0],2)
        R[i,i] = t1*t2*np.dot(X[i],w) + t3*t2*t2

    return(np.dot(np.dot(np.transpose(X),R),X) + np.eye(d))

def neg_log_posterior(w):
    w=w.reshape(-1,1)
    epsilon = 1e-12
    mu = compute_mu(X, w)
    prob_1 = Y*np.log(mu+epsilon)
    prob_0 = (1-Y)*np.log(1-mu+epsilon)
    log_like = np.sum(prob_1) + np.sum(prob_0)
    w_norm = np.power(np.linalg.norm(w),2)
    neg_log_pos = -log_like+w_norm/2
    print("neg_log_posterior = {:.4f} \tlog_like = {:.4f} \tw_norm = {:.4f}".format(neg_log_pos, log_like, w_norm))
    return(neg_log_pos)

def test(w, X, y):
    n,d = X.shape
    mu = compute_mu(X, w)
    #print(mu.shape, n, d)
    yhat = np.zeros((n,1)).astype(np.float64)
    yhat[mu>0.5]=1
    correct = np.sum(yhat==y)
    return(correct,n)

n,d = X.shape
n1,d1 = Xtest.shape

Y = Y.reshape(n,1)
Ytest = Ytest.reshape(n1,1)

res = minimize(neg_log_posterior, initialise_w('random'), method='BFGS', jac=first_derivative,
               tol= 1e-5, options={'maxiter': 100})
correct,n = test(res.x, Xtest, Ytest)
print("\n_____________Model trained______________\n")
print("\nModel weights : ", res.x)
print("\n_____________Test Accuracy______________\n")

print("Accuracy : {}% ".format(correct/n*100))

res = minimize(neg_log_posterior, initialise_w('random'), method='BFGS', jac=first_derivative,
               tol= 1e-5, options={'maxiter': 100})
correct,n = test(res.x, Xtest, Ytest)
print("\n_____________Model trained______________\n")
print("\nModel weights : ", res.x)
print("\n_____________Test Accuracy______________\n")

print("Accuracy : {}% ".format(correct/n*100))

w is initialised from N[0,1]
neg_log_posterior = 465.8474 	log_like = -465.8155 	w_norm = 0.0637
neg_log_posterior = 291.2968 	log_like = -290.7578 	w_norm = 1.0780
neg_log_posterior = 1775.9106 	log_like = -1767.6721 	w_norm = 16.4770
neg_log_posterior = 275.6776 	log_like = -275.0765 	w_norm = 1.2023
neg_log_posterior = 482.7360 	log_like = -481.0474 	w_norm = 3.3772
neg_log_posterior = 274.6006 	log_like = -273.9916 	w_norm = 1.2179
neg_log_posterior = 276.9870 	log_like = -276.3142 	w_norm = 1.3456
neg_log_posterior = 274.3439 	log_like = -273.7251 	w_norm = 1.2376
neg_log_posterior = 274.2939 	log_like = -273.6704 	w_norm = 1.2470
neg_log_posterior = 274.1949 	log_like = -273.5726 	w_norm = 1.2445
neg_log_posterior = 273.8384 	log_like = -273.2209 	w_norm = 1.2349
neg_log_posterior = 272.9444 	log_like = -272.3418 	w_norm = 1.2054
neg_log_posterior = 269.9068 	log_like = -269.3607 	w_norm = 1.0923
neg_log_posterior = 271.7894 	log_like = -271.3041 	w_norm = 0.9706
neg_log_posterio

### Removing columns

In [174]:
def initialise_w(initialise):
    if(initialise == 'random'):
        w = np.random.randn(d,1)/10
        print("w is initialised from N[0,1]")
    elif(initialise == 'zeros'):
        w = np.zeros((d,1))
        print("w is initialised as a zero vector")
    else:
        print("Method unknown")
    return w

def compute_mu(X, w):
    phi=np.dot(X,w)
    mu = norm.cdf(phi)
    mu = mu.reshape(X.shape[0],1)
    return mu

def first_derivative(w):
    mu = compute_mu(X, w)
    epsilon = 1e-12
    phi=np.dot(X,w)
    grad_mu = X*(scipy.stats.norm.pdf(phi,0,1).reshape(-1,1))
    return(np.sum((- Y*(1/(mu)) + (1-Y)*(1/(1+epsilon-mu)))*grad_mu,0) + w).squeeze()

def second_deivative(w,X,y):
    mu = compute_mu(X, w)
    R = np.eye(n)

    phi=np.dot(X,w)
    for i in range(n):
        t1 = (y[i] - mu[i,0])/(mu[i,0] * (1-mu[i,0]))
        t2 = scipy.stats.norm.pdf(phi[i,0],0,1)
        t3 = (1-y[i])/np.power(1-mu[i,0],2) + y[i]/np.power(mu[i,0],2)
        R[i,i] = t1*t2*np.dot(X[i],w) + t3*t2*t2

    return(np.dot(np.dot(np.transpose(X),R),X) + np.eye(d))

def neg_log_posterior(w):
    w=w.reshape(-1,1)
    epsilon = 1e-12
    mu = compute_mu(X, w)
    prob_1 = Y*np.log(mu+epsilon)
    prob_0 = (1-Y)*np.log(1-mu+epsilon)
    log_like = np.sum(prob_1) + np.sum(prob_0)
    w_norm = np.power(np.linalg.norm(w),2)
    neg_log_pos = -log_like+w_norm/2
    print("neg_log_posterior = {:.4f} \tlog_like = {:.4f} \tw_norm = {:.4f}".format(neg_log_pos, log_like, w_norm))
    return(neg_log_pos)

def test(w, X, y):
    n,d = X.shape
    mu = compute_mu(X, w)
    #print(mu.shape, n, d)
    yhat = np.zeros((n,1)).astype(np.float64)
    yhat[mu>0.5]=1
    correct = np.sum(yhat==y)
    return(correct,n)

n,d = X.shape
n1,d1 = Xtest.shape

Y = Y.reshape(n,1)
Ytest = Ytest.reshape(n1,1)

res = minimize(neg_log_posterior, initialise_w('random'), method='BFGS', jac=first_derivative,
               tol= 1e-5, options={'maxiter': 100})
correct,n = test(res.x, Xtest, Ytest)
print("\n_____________Model trained______________\n")
print("\nModel weights : ", res.x)
print("\n_____________Test Accuracy______________\n")

print("Accuracy : {}% ".format(correct/n*100))

res = minimize(neg_log_posterior, initialise_w('random'), method='BFGS', jac=first_derivative,
               tol= 1e-5, options={'maxiter': 100})
correct,n = test(res.x, Xtest, Ytest)
print("\n_____________Model trained______________\n")
print("\nModel weights : ", res.x)
print("\n_____________Test Accuracy______________\n")

print("Accuracy : {}% ".format(correct/n*100))

w is initialised from N[0,1]
neg_log_posterior = 411.6846 	log_like = -411.6579 	w_norm = 0.0535
neg_log_posterior = 334.4228 	log_like = -333.7545 	w_norm = 1.3366
neg_log_posterior = 1623.9979 	log_like = -1617.3865 	w_norm = 13.2227
neg_log_posterior = 330.5323 	log_like = -329.8332 	w_norm = 1.3982
neg_log_posterior = 358.1512 	log_like = -357.2158 	w_norm = 1.8708
neg_log_posterior = 330.1067 	log_like = -329.3967 	w_norm = 1.4200
neg_log_posterior = 329.6562 	log_like = -328.9509 	w_norm = 1.4106
neg_log_posterior = 328.7585 	log_like = -328.0620 	w_norm = 1.3929
neg_log_posterior = 325.4192 	log_like = -324.7569 	w_norm = 1.3245
neg_log_posterior = 318.5202 	log_like = -317.9400 	w_norm = 1.1605
neg_log_posterior = 354.0487 	log_like = -353.3236 	w_norm = 1.4504
neg_log_posterior = 317.5307 	log_like = -316.9557 	w_norm = 1.1501
neg_log_posterior = 318.2368 	log_like = -317.6596 	w_norm = 1.1543
neg_log_posterior = 317.1600 	log_like = -316.5873 	w_norm = 1.1453
neg_log_posterio