In [1]:
import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize
import pandas as pd

In [2]:
df = pd.read_csv('framingham.csv')
df.dropna(inplace=True)
df.to_csv('framingham.csv', index=False)
data = np.genfromtxt('framingham.csv', delimiter=',', skip_header=1)

In [3]:
df.shape
X, y = data[:, 0:15], data[:, 15]

In [4]:
def plotData(X,y):
    pos = y == 1
    neg = y == 0

# Plot Examples
    plt.plot(X[pos, 0], X[pos, 1], 'k*', lw=2, ms=10)
    plt.plot(X[neg, 0], X[neg, 1], 'ko', mfc='y', ms=8, mec='k', mew=1)

In [5]:
def sigmoid(z):
    # convert input to a numpy array
    z = np.array(z)
    g = np.zeros(z.shape)
    den=1+(np.exp(-z))
    g=1/den
    return g

In [6]:
# Setup the data matrix appropriately, and add ones for the intercept term
m, n = X.shape

# Add intercept term to X
X = np.concatenate([np.ones((m, 1)), X], axis=1)
theta=np.zeros(n+1)

In [7]:
def costFunction(theta, X, y,lambda_):
    m = y.size  # number of training examples
    J = 0
    grad = np.zeros(theta.shape)
    loss=-y*np.log(sigmoid(np.dot(X,theta)))-(1-y)*np.log(1-sigmoid(np.dot(X,theta)))+(lambda_ / 2 ) * np.dot(theta[0:].T, theta[0:])
    J=(np.sum(loss))/m
    predict=sigmoid(np.dot(X,theta))
    error=predict-y
    grad = (1 / m) * np.dot(X.T, error)+(lambda_/m)*theta[0:]
    return J,grad

In [8]:
lambda_=0.0001
cost,grad = costFunction(theta, X, y,lambda_)
print(cost)

0.6931471805599452


In [9]:
options= {'maxiter': 400}
res = optimize.minimize(costFunction,
                        theta,
                        (X, y,lambda_),
                        jac=True,
                        method='TNC',
                        options=options)
cost = res.fun

# the optimized theta is in the x property
theta = res.x

# Print theta to screen
print('Cost at theta found by optimize.minimize: {:.3f}'.format(cost))
print('theta:')
print('\t[{:.3f}, {:.3f}, {:.3f}]'.format(*theta))

Cost at theta found by optimize.minimize: 0.380
theta:
	[-8.242, 0.549, 0.064]


  res = optimize.minimize(costFunction,


In [10]:
def predict(theta,X):
    m = X.shape[0] 
    p = np.zeros(m)
    probabilities = sigmoid(np.dot(X, theta))

    # Set the predictions based on the threshold of 0.5
    p[probabilities > 0.5] = 1

    return p
    

In [11]:
prob = sigmoid(np.dot([1,1, 42, 3, 0, 5, 1, 0, 1, 0, 100, 120, 80, 50, 75, 400], theta))
print('we predict a ten year coronary heart disease probability of {:.3f}'.format(prob))

# Compute accuracy on our training set
p = predict(theta, X)
print('Train Accuracy: {:.2f} %'.format(np.mean(p == y) * 100))

we predict a ten year coronary heart disease probability of 0.480
Train Accuracy: 85.59 %


In [12]:
import joblib

# After training your model
# Save the model to a pickle file
joblib.dump(res, 'logistic_regression_model.pkl')

['logistic_regression_model.pkl']