In [10]:
import numpy as np
import csv
import math

A, b = [], []						
with open('framingham.csv','r') as f:			# Getting data from csv file as numpy arrays A and b
  rows = csv.reader(f)
  next(rows)
  for i in rows:
    x = []
    for j in i[:-1]:
      if j=='NA':
        x.append(np.nan)
      else:
        x.append(float(j))
    
    y = [int(i[-1])]
    if(np.nan not in x):
      A.append(x)
      b.append(y)

A = np.array(A)
B = np.array(b)

In [11]:
# Standardising data

A_mean = np.mean(A,axis=0)
A_std = np.std(A,axis=0)
A = (A - A_mean)/A_std
A = np.insert(A,0,np.ones(len(A)),axis=1)

In [12]:
# Splitting data

A1 = A[0:int(0.7 * len(A))]                         # Training data
B1 = B[0:int(0.7 * len(A))]

A2 = A[int(0.7 * len(A)):int(0.85 * len(A))]        # Validation data
B2 = B[int(0.7 * len(A)):int(0.85 * len(A))]

A3 = A[int(0.85 * len(A)):]                         # Test data
B3 = B[int(0.85 * len(A)):]

In [13]:
def p(A_row,X):
  p = 1/(1+np.exp(-1 * A_row@X))
  return p
 
def cost(A,B,X):
  negative_cost = 0
  for i in range(len(B)):
    if (B[i] == True):
      negative_cost += np.log(p(A[i], X))
    else:
      negative_cost += np.log(1 - p(A[i], X))
  return (-1 * negative_cost)

def grad_cost(A,B,X):
  grad = np.zeros(len(X),dtype=np.float64)
  for i in range(len(B)):
    grad += (p(A[i],X)-B[i])*A[i]
  return grad

In [17]:
# Gradient descent

learning_rate = 0.01
eps = 1e-5
X = np.full(A1.shape[1],1)				# Initializing weights
#print(cost(A1,B1,X))

i = 0
while True:						
  i += 1
  X_new = X - learning_rate * grad_cost(A1,B1,X)
  cost_change = cost(A1,B1,X_new) - cost(A1,B1,X)
  
  if(math.fabs(cost_change) < eps):
    break
     
  if((cost_change > 0) and learning_rate >= 1e-8):
    learning_rate /= 2
  else:
    X = X_new
#    print(cost(A1,B1,X))

In [18]:
# Running on validation dataset (A2 and B2)

count = 0
t_count = 0

for j in range(len(A2)):
  prob = p(A2[j], X)
  if (prob > 0.5 and B2[j] == 1) or (prob < 0.5 and B2[j] == 0):
    count += 1
  t_count += 1

In [19]:
print("\nUsing gradient descent: ")  
print("Number of iterations: ",i)
print("X = ", X)
print("Cost = ", cost(A1,B1,X))


Using gradient descent: 
Number of iterations:  54
X =  [-1.97287398  0.24237883  0.51007953 -0.05810458  0.01686227  0.26597639
 -0.00590217  0.05379494  0.08314499  0.01443754  0.08822553  0.33247217
 -0.02554583  0.07980123 -0.07140249  0.2237233 ]
Cost =  966.4959763037369


In [20]:
print("\nRunning on validation data")
print("Total number of records in validation dataset:", t_count)
print("Number of records correctly predicted:", count)
print("Fraction of records giving success:", count/t_count)


Running on validation data
Total number of records in validation dataset: 548
Number of records correctly predicted: 474
Fraction of records giving success: 0.864963503649635
