## **Ques 5: Logistic Regression**

### **5a. Implementation of logistic regression classifier.**

In [1]:
import random
import numpy as np
from sklearn.metrics import precision_score, recall_score

np.random.seed(999)
random.seed(999)

In [2]:
class LogisticRegression():
    def __init__(self, learning_rate, max_iter):
        super(LogisticRegression, self).__init__()
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.weights, self.bias, self.losses = 0, 0, []

    def sigmoid(self, logits):
        return 1/(1+np.exp(-1*logits))
   
    def loss(self, targets, predictions):
        loss = -1*np.mean(targets*(np.log(predictions)) + (1-targets)*(np.log(1-predictions)))
        
        return loss
    
    # Computation of gradients.
    def backward(self, inputs, targets, predictions):
        num_samples = inputs.shape[0]
        dw = (1/num_samples) * (np.dot(inputs.T, (predictions - targets)))
        db = (1/num_samples) * (np.sum((predictions - targets)))
        
        return dw/num_samples, db/num_samples
#         return dw, db
    
    # Fit function for training the model.
    def fit(self, inputs, targets):
        losses = []
        num_samples, feature_dim = inputs.shape
#         wt_matrix, bias = np.zeros((feature_dim, 1)), 0
        # Using the same weights as given in the next question for simplicity.
        wt_matrix, bias = np.array([1.5, 0.5]).reshape((feature_dim, 1)), -1
        targets = targets.reshape(num_samples, 1)
        
        for epoch in range(self.max_iter):
            # Forward Pass
            logits = np.dot(inputs, wt_matrix) + bias
#             print('Logits: ', logits)
            predictions = self.sigmoid(logits)
            predictions = predictions.reshape(num_samples, 1)
            loss = self.loss(targets, predictions)

            # Backward Pass
            dw, db = self.backward(inputs, targets, predictions)
#             print('Gradients: ', dw, db)

            # Parameters Update step
            wt_matrix -= self.learning_rate*dw
            bias -= self.learning_rate*db
            losses.append(loss)
#             print('Updated wts: ', wt_matrix)
#             print('Updated bias: ', bias)

            if not epoch%4:
                print('Epoch {}:\tloss --> {:.4f}'.format(epoch+1, loss))
                
        self.weights, self.bias, self.losses = wt_matrix, bias, losses
        
        return 
    
    # Predict utility for making predictions.
    def predict(self, inputs):
        logits = np.dot(inputs, self.weights) + self.bias
        outputs = self.sigmoid(logits)
        predictions = [1 if output>=0.5 else 0 for output in outputs]
        return np.array(predictions)
    
    # Score utility for metric (accuracy) computation.
    def score(self, inputs, targets):
        predictions = self.predict(inputs)
#         print('Predictions: ', predictions)
        accuracy = np.sum(targets == predictions) / targets.shape[0]
        
        return accuracy

In [3]:
# # Uncomment this piece of code to udpate parameters for each sample per epoch. The results were getting saturated at an accuracy of 0.833 here also, so opted for other approach.

# class LogisticRegression_per_sample_update():
#     def __init__(self, learning_rate, max_iter):
#         super(LogisticRegression_per_sample_update, self).__init__()
#         self.learning_rate = learning_rate
#         self.max_iter = max_iter
#         self.weights, self.bias, self.losses = 0, 0, []
#         self.ss = StandardScaler()

#     def sigmoid(self, logit):
#         return 1/(1+np.exp(-1*logit))
   
#     def loss(self, target, prediction):
#         loss = -1 * target*(np.log(prediction)) + (1-target)*(np.log(1-prediction))
        
#         return loss
    
#     def backward(self, input, target, prediction):
#         input = input.reshape(input.shape[0], 1)
#         dw = np.dot(input, (prediction - target).reshape(1,1))
#         db = np.sum((prediction - target))
        
#         return dw, db
    
#     def fit(self, inputs, targets):
#         losses = []
#         num_samples, feature_dim = inputs.shape
# #         wt_matrix, bias = np.zeros((feature_dim, 1)), 0
#         wt_matrix, bias = np.array([1.5, 0.5]).reshape((feature_dim, 1)), -1
#         targets = targets.reshape(num_samples, 1)
#         inputs = self.ss.fit_transform(inputs)
        
#         for epoch in range(self.max_iter):
#             loss_per_epoch=0
#             for i in range(num_samples):
#             #Forward Pass
#                 logit = np.dot(inputs[i], wt_matrix)+bias
#                 prediction = self.sigmoid(logit)
# #                 prediction = prediction.reshape(num_samples, 1)
#                 loss_per_sample = self.loss(targets[i], prediction)

#                 #Backward Pass
#                 dw, db = self.backward(inputs[i], targets[i], prediction)

#                 #Parameters Update step
#                 wt_matrix -= self.learning_rate*dw
#                 bias -= self.learning_rate*db
#                 loss_per_epoch += loss_per_sample
                
#             loss_per_epoch /= num_samples
#             losses.append(loss_per_epoch[0])
#             if not epoch%9:
#                 print('Epoch {}:\tloss --> {:.4f}'.format(epoch+1, loss_per_epoch[0]))
                
#         self.weights, self.bias, self.losses = wt_matrix, bias, losses
        
#         return 
    
#     def predict(self, inputs):
#         inputs = self.ss.fit_transform(inputs)
#         logits = np.dot(inputs, self.weights) + self.bias
#         outputs = self.sigmoid(logits)
#         predictions = [1.0 if output>=0.5 else 0.0 for output in outputs]
#         return np.array(predictions)
    
#     def score(self, inputs, targets):
#         predictions = self.predict(inputs)
#         accuracy = np.sum(targets == predictions) / targets.shape[0]
        
#         return accuracy

#### **Creating training and test datasets as given and splitting it into features and targets for training.**

In [4]:
train = np.array([[0.346, 0.780, 0.0],[0.303, 0.439, 0.0], [0.358, 0.729, 0.0], [0.602, 0.863, 1.0], [0.790, 0.753, 1.0], [0.611, 0.965, 1.0]])
test = np.array([[0.959, 0.382, 0.0], [0.750, 0.306, 0.0], [0.395, 0.760, 0.0], [0.823, 0.764, 1.0], [0.761, 0.874, 1.0], [0.844, 0.435, 1.0]])

train = train.reshape((6,3))
test = test.reshape((6,3))

In [5]:
train

array([[0.346, 0.78 , 0.   ],
       [0.303, 0.439, 0.   ],
       [0.358, 0.729, 0.   ],
       [0.602, 0.863, 1.   ],
       [0.79 , 0.753, 1.   ],
       [0.611, 0.965, 1.   ]])

In [6]:
np.random.shuffle(train)
train

array([[0.358, 0.729, 0.   ],
       [0.611, 0.965, 1.   ],
       [0.602, 0.863, 1.   ],
       [0.303, 0.439, 0.   ],
       [0.79 , 0.753, 1.   ],
       [0.346, 0.78 , 0.   ]])

In [7]:
test

array([[0.959, 0.382, 0.   ],
       [0.75 , 0.306, 0.   ],
       [0.395, 0.76 , 0.   ],
       [0.823, 0.764, 1.   ],
       [0.761, 0.874, 1.   ],
       [0.844, 0.435, 1.   ]])

In [8]:
X_train, y_train = train[:, 0:2], train[:, 2]
X_test, y_test = test[:, 0:2], test[:, 2]

## **5b: Running the model for given data.** 
<font size="4">**(i) Logistic Model P(y_hat=1 | x1, x2)** = sigmoid( theta<sub>0</sub> + theta<sub>1</sub>.x<sub>1</sub> + theta<sub>2</sub>.x<sub>2</sub>) => **sigmoid( -1 + 1.5\*x<sub>1</sub> + 0.5\*x<sub>2</sub> )**</br>
**Cross Entropy Error (y=y, y_hat=1)** = -(y\*log(1) + (1-y)\*log(1-1)) => **-1\* ( 0 + (1-y) \* log(0) )**</font>

<font size="4">**(ii):** Gradient descent to update parameters for 1 iteration (*_Comment out zero initialization and uncomment given initialization weights and bias_*).</font>

In [9]:
lr = LogisticRegression(learning_rate=0.1, max_iter=1)
lr.fit(X_train, y_train)
lr.weights, lr.bias

Epoch 1:	loss --> 0.5570


(array([[1.50089181],
        [0.50032811]]), -1.000527709962094)

<font size="4"> **Updated logistic regression model:** 1 / ( 1 + e<sup>-( -1.0005 + 1.5008\*x<sub>1</sub> + 0.5003\*x<sub>2</sub> )</sup> ) </font>

In [10]:
lr = LogisticRegression(learning_rate=0.1, max_iter=10)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
acc = lr.score(X_test, y_test)
precision, recall = precision_score(y_test, y_pred), recall_score(y_test, y_pred)
print('Accuracy: {:.4f}\nPrecision: {:.4f}\nRecall: {:.4f}'.format(acc, precision, recall))

Epoch 1:	loss --> 0.5570
Epoch 5:	loss --> 0.5567
Epoch 9:	loss --> 0.5564
Accuracy: 0.6667
Precision: 0.6000
Recall: 1.0000
