# Logistic regression

In [1]:
import numpy as np


In [2]:
def sigmoid(scores):
    return 1 / (1 + np.exp(-scores))

In [3]:
def log_likelihood(features, target, weights):
    scores = np.dot(features, weights)
    ll = np.sum( target*scores - np.log(1 + np.exp(scores)) )
    return ll

In [4]:
def logistic_regression(features, target, num_steps, learning_rate, add_intercept = False):
    if add_intercept:
        intercept = np.ones((features.shape[0], 1))
        features = np.hstack((intercept, features))
        
    weights = np.zeros(features.shape[1])
    
    for step in range(num_steps):
        scores = np.dot(features, weights)
        predictions = sigmoid(scores)

        # Update weights with gradient
        output_error_signal = target - predictions
        gradient = np.dot(features.T, output_error_signal)
        weights += learning_rate * gradient
        
        # Print log-likelihood every so often
        if step % 10000 == 0:
            print (log_likelihood(features, target, weights))
        
    return (weights)

# sample data set where x is no of hrs studied and y has 0-fail 1-pass

In [5]:
x=[0.50,0.75,1.00,1.25,1.50,1.75,1.75,2.00,2.25,2.50,2.75,3.00,3.25,3.50,4.00,4.25,4.50,4.75,5.00,5.50]
y=[0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1]

In [6]:
features=np.vstack(x).astype(np.float32)

In [7]:
target=np.hstack(y).astype(np.float32)

In [8]:
weights = logistic_regression(features, target,
                     num_steps = 300000, learning_rate = 5e-5, add_intercept=True)

-13.8578241911
-10.3812183918
-9.29589930022
-8.77343058198
-8.49296719662
-8.33028194731
-8.23055316224
-8.16689536728
-8.12500873275
-8.09679603858
-8.07744218143
-8.06397035833
-8.05448183288
-8.0477343324
-8.04289794183
-8.03940856335
-8.036877215
-8.03503241142
-8.03368273682
-8.03269206511
-8.0319628814
-8.0314248956
-8.03102717295
-8.03073263709
-8.03051419483
-8.03035198232
-8.03023139449
-8.03014166654
-8.03007484747
-8.03002505392


In [9]:
weights

array([-4.05172425,  1.49584987])

In [10]:
m,b=weights

# log(p(x)/(1-p(x)))=b0+b1*x1

# p(x)=1/(1+exp(-(b0+b1*x1)))

In [11]:
def predict(x):
    p=1/(1+np.exp(-(m+b*x)))
    if(p>0.5):
        print("pass",p)
    else:
        print("fail",p)

In [12]:
predict(8)

pass 0.999634936718


In [13]:
predict(3)

pass 0.607263850195


In [18]:
predict(2)

fail 0.257304771554


In [19]:
predict(2.5)

fail 0.422602340258


In [20]:
predict(2.75)

pass 0.515460793665


In [22]:
print(features)

[[ 0.5 ]
 [ 0.75]
 [ 1.  ]
 [ 1.25]
 [ 1.5 ]
 [ 1.75]
 [ 1.75]
 [ 2.  ]
 [ 2.25]
 [ 2.5 ]
 [ 2.75]
 [ 3.  ]
 [ 3.25]
 [ 3.5 ]
 [ 4.  ]
 [ 4.25]
 [ 4.5 ]
 [ 4.75]
 [ 5.  ]
 [ 5.5 ]]


In [23]:
intercept = np.ones((features.shape[0], 1))
features = np.hstack((intercept, features))

In [24]:
intercept

array([[ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.]])

In [25]:
features

array([[ 1.  ,  0.5 ],
       [ 1.  ,  0.75],
       [ 1.  ,  1.  ],
       [ 1.  ,  1.25],
       [ 1.  ,  1.5 ],
       [ 1.  ,  1.75],
       [ 1.  ,  1.75],
       [ 1.  ,  2.  ],
       [ 1.  ,  2.25],
       [ 1.  ,  2.5 ],
       [ 1.  ,  2.75],
       [ 1.  ,  3.  ],
       [ 1.  ,  3.25],
       [ 1.  ,  3.5 ],
       [ 1.  ,  4.  ],
       [ 1.  ,  4.25],
       [ 1.  ,  4.5 ],
       [ 1.  ,  4.75],
       [ 1.  ,  5.  ],
       [ 1.  ,  5.5 ]])

In [26]:
weights = np.zeros(features.shape[1])

In [27]:
weights


array([ 0.,  0.])