# Tanay Yadav
# AI20BTECH11026
# Assignment 4 - Q5

In [693]:
# importing the required libraries

import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

In [694]:
# creating the training and testing arrays with the raw provided data

X_train = np.array([[0.346, 0.780], [0.303, 0.439], [0.358, 0.729], [0.602, 0.863], [0.790, 0.753], [0.611, 0.965]])
Y_train = np.array([[0], [0], [0], [1], [1], [1]])

X_test = np.array([[0.959, 0.382], [0.750, 0.306], [0.395, 0.760], [0.823, 0.764], [0.761, 0.874], [0.844, 0.435]])
Y_test = np.array([[0], [0], [0], [1], [1], [1]])

In [695]:
def sigmoid(X, weights):
    '''
    Parameters:
    X : 1D array containing the datapoints
    weights : Weights for the regressor model

    Returns:
    output of the input datapoints using the sigmoid function
    '''
    return 1/(1 + np.exp(-weights.T@X))

In [696]:
def crossEntropyLoss(Y_hat, Y):
    '''
    Parameters:
    Y_hat : 1D array containing the predicted outcomes from the regressor.
    Y : actual outcomes for the given datapoints.

    Returns:
    Returns the cross entropy loss of the input predicted outcomes.
    '''
    return Y*np.log(Y_hat) + (1-Y)*np.log(1-Y_hat)

In [697]:
class LogisticRegressor( ):    
    def fit(X, Y, gamma, weights, epochs=10000):
        '''
        Parameters: 
        X: 2D array containing the training datapoints
        Y: 1D array containing the outcomes to the input datapoints
        weights: 1D array containing pre-intitialized weights
        epochs: Number of iterations, default is 10000

        Returns:
        Total cross entropy loss over the number of epochs
        '''
        for i in range(epochs):
            tot_loss = 0
            for j in range(len(X)):
                y_hat = sigmoid(X[j], weights)
                weights -= gamma *X[j]*(y_hat - Y[j])/ len(X)
                tot_loss += crossEntropyLoss(y_hat, int(Y[j]))
            tot_loss = -(1/len(Y))*tot_loss
        return tot_loss   
    def predict(X, weights):
        '''
        Parameters:
        X: 2D array containing the test datapoints
        weights: 1D array containing the weights obtained from fitting the training set
        
        Returns:
        y_hat: 1D array containing the predicted outcomes for the input datapoints
        '''
        y_hat = []
        for i in range(len(X)):
            y = sigmoid(X[i], weights)   
            y_hat.append(1 if y>=0.5 else 0)
        return y_hat 

In [698]:
# preparing the training and testing datapoints

temp = np.ones((6,1))
X_train = np.hstack((temp, X_train))
X_test = np.hstack((temp, X_test))
weights = np.array([-1, 1.5, 0.5])

The Logistic Model $P(\hat{y} = 1|x_1, x_2)$ is:  
$\ln\left(\frac{p}{1-p}\right) = l = \hat{y} = -1 + 1.5x_1 + 0.5x_2 = ln\left(\frac{P(\hat{y}=1|x_1,x_2)}{1-P(\hat{y}=1|x_1,x_2)}\right)$    
The cross entropy function is:  
$E = y\ln(\hat{y}) + (1-y)\ln(1 - \hat{y})$

In [699]:
# running the regressor for one iteration

cel_1 = LogisticRegressor.fit(X_train, Y_train, 0.1, weights, 1)
print('Cross Entropy Loss after 1 iteration = %.6f'%cel_1)
print('Weights =', weights)

Cross Entropy Loss after 1 iteration = 0.559503
Weights = [-1.0027121   1.50560518  0.50232236]


The Updated Logistic Model $P(\hat{y} = 1|x_1, x_2)$ is:  
$\ln\left(\frac{p}{1-p}\right) = l = \hat{y} = -1.003 + 1.505x_1 + 0.502x_2 = ln\left(\frac{P(\hat{y}=1|x_1,x_2)}{1-P(\hat{y}=1|x_1,x_2)}\right)$

In [700]:
# running the regressor until the gradient converges

weights = np.array([-1, 1.5, 0.5])
cel = LogisticRegressor.fit(X_train, Y_train, 0.1, weights)
print('Cross Entropy Loss after the convergence of gradient descent = %.6f'%cel)
print('Weights =', weights)

Cross Entropy Loss after the convergence of gradient descent = 0.062421
Weights = [-11.28397278  16.14187561   4.36162092]


The Updated Logistic Model $P(\hat{y} = 1|x_1, x_2)$ is:  
$\ln\left(\frac{p}{1-p}\right) = l = \hat{y} = -11.284 + 16.142x_1 + 4.361x_2 = ln\left(\frac{P(\hat{y}=1|x_1,x_2)}{1-P(\hat{y}=1|x_1,x_2)}\right)$

In [701]:
# predicting the outcomes over the test dataset.

Y_predicted = LogisticRegressor.predict(X_test, weights)

In [702]:
# obtaining the important metrics for the testing dataset.

train_accuracy = accuracy_score(Y_predicted, Y_test)
print('Validation Error : %.4f' %(1-train_accuracy))
recall = recall_score(Y_predicted, Y_test)
print('Recall Score :', recall)
precision = precision_score(Y_predicted, Y_test)
print('Precision Score :', precision)

Validation Error : 0.3333
Recall Score : 0.6
Precision Score : 1.0
