In [19]:
import numpy as np
import pandas as pd

In [20]:
from sklearn.datasets import load_diabetes

In [21]:
data = load_diabetes()

In [22]:
X = data['data']
y = data['target']

In [23]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [24]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [25]:
def scale(d):
    mean = d.mean()
    std = d.std()
    return (d - mean) / std

In [26]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    
    ## The examples are read at random, in no particular order
    np.random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = np.array(indices[i: min(i + batch_size, num_examples)])
        
        yield features[batch_indices], labels[batch_indices]

In [27]:
def load_datasets():
    data = load_diabetes()
    X = data['data']
    y = data['target']
    return X, y


In [31]:
class LogisticRegression:
    """ 
    This is the scratch implementation of Logistic Regression
    """
    
    def __init__(self, X, y):
        self.param = {}
        self.m, self.n = X.shape
        self.param['W'] = np.random.randn(self.n, 1) * 0.001
        self.param['b'] = np.zeros(1)
        
        self.X = X
        self.y = y
        
        self.result = pd.DataFrame()
        
    
    def train(self, alpha = 0.001, epochs = 10):
        for epoch in range(epochs):
            print("Epoch: ", epoch, end = "")
            z = np.dot(self.X, self.param["W"]) + self.param['b']
            
            self.y_pred = self.sigmoid(z)
            # print(y_pred)
            self.result[0] = self.y
            
            ## Update the parameters
            self.param['W'] = self.param['W'] - alpha * 1/self.m * np.dot(self.X.transpose(), 
                                                                         (self.y_pred - np.reshape(self.y,
                                                                                                 (self.m, 1))))
                                                                          
            self.param['b'] = self.param['b'] - alpha * 1/self.m * np.sum(self.y_pred - np.reshape(self.y, 
                                                                                                  (self.m, 1)))
                                                                
            self.y_pred = self.sigmoid(np.dot(self.X, self.param['W']) + self.param['b'])
                                                                          
            loss = self.loss(self.y, self.y_pred)
            
            self.result[1] = self.y_pred
            
            print(", loss = ", loss)
                                                            
        print(", Final Loss = ", loss)
        print("  W: {}, b = {}".format(self.param['W'], self.param['b']))
                                                                          
                                                                          
    def loss(self, y, y_pred):
        # print(np.log(1 - y_pred))
        y_zero_loss = y.T.dot(np.log(y_pred))
        y_one_loss = (1 - y).T.dot(np.log(1 - y_pred))
        
        return -np.sum(y_zero_loss + y_one_loss)/ len(y)
                                                                          
                                                                          
    def sigmoid(self, z):
        return 1.0/(1 + np.exp(-z))
                                                                          
                                                                          
    def predict(self, X):
        return self.sigmoid(np.dot(X, self.param['W']) + self.param['b'])

In [32]:
X, y = load_datasets()

In [43]:
alpha = 0.00000001
epochs = 300
log_model = LogisticRegression(X, y)
log_model.train(alpha, epochs)

Epoch:  0, loss =  0.6937708131928414
Epoch:  1, loss =  0.6935406903323539
Epoch:  2, loss =  0.69331056747292
Epoch:  3, loss =  0.6930804446147206
Epoch:  4, loss =  0.6928503217576736
Epoch:  5, loss =  0.6926201989017459
Epoch:  6, loss =  0.692390076046954
Epoch:  7, loss =  0.6921599531933474
Epoch:  8, loss =  0.6919298303408272
Epoch:  9, loss =  0.6916997074895088
Epoch:  10, loss =  0.691469584639359
Epoch:  11, loss =  0.6912394617903452
Epoch:  12, loss =  0.6910093389424342
Epoch:  13, loss =  0.6907792160957085
Epoch:  14, loss =  0.6905490932500857
Epoch:  15, loss =  0.690318970405681
Epoch:  16, loss =  0.6900888475624122
Epoch:  17, loss =  0.6898587247202298
Epoch:  18, loss =  0.6896286018792985
Epoch:  19, loss =  0.6893984790394208
Epoch:  20, loss =  0.6891683562007611
Epoch:  21, loss =  0.6889382333632538
Epoch:  22, loss =  0.6887081105269153
Epoch:  23, loss =  0.6884779876915973
Epoch:  24, loss =  0.6882478648575304
Epoch:  25, loss =  0.6880177420246323
E