# **1. Packages**

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# **2. Functions**

In [13]:
def sigmoid(z):

    g = 1 / (1 + np.exp(-z))

    return g

In [14]:
def computeLRCost(x, y, theta):

    m = len(y)

    z = np.dot(x, theta)

    cost = -1 / m * np.sum(np.dot(np.log(sigmoid(z)), y) + np.dot(np.log(1 - sigmoid(z)), (1 - y)))

    grad = 1 / m * np.dot(x.T, sigmoid(z) - y)

    return cost, grad

In [15]:
def TrainLRModel(X, y):

    alpha = 0.03

    theta = np.array([0.0, 0.0, 0.0, 0.0])

    temp = np.array([0.0, 0.0, 0.0, 0.0])

    m = len(y)

    for iteration in range(6000):

        cost, grad = computeLRCost(X, y, theta)

        temp = theta - alpha * ((1 / m) * grad)

        theta = temp

    return theta

In [16]:
def predictClass(x, theta, threshold):

    z = np.dot(x, theta)

    y = np.zeros(z.shape)

    count = 0

    for i in z:

        if sigmoid(i) >= threshold:
            y[count] = 1

        if sigmoid(i) < threshold:
            y[count] = 0

        count += 1

    return y

In [17]:
def testPerformance(y, y_predicted):

    matrix = confusion_matrix(y, y_predicted)

    acc = (matrix[0][0] + matrix[1][1]) / matrix.sum()

    recall = matrix[1][1] / (matrix[1][1] + matrix[1][0])

    precision = matrix[1][1] / (matrix[1][1] + matrix[0][1])

    fScore = (2 * (precision * recall)) / (precision + recall)

    return acc, recall, precision, fScore

# **3. Dataset loading and cleaning**

In [18]:
# loading dataset
train_data = pd.read_csv("dataset.csv")

y = train_data.iloc[:, -1].values

X1 = train_data.iloc[:, :-1].values

# remove first column
X = X1[:, 1:]

# considering theta0
ones = [1 for i in range(len(y))]
X = np.column_stack((ones, X))

# Split dataset
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# **4. Test**

In [20]:
theta = TrainLRModel(x_train, y_train)

y_predict = predictClass(x_test, theta, 0.3)

acc, recall, precision, fScore = testPerformance(y_test, y_predict)

print("Accuracy: ", "{:.2f}".format(acc), "\n")
print("Recall: ", "{:.2f}".format(recall), "\n")
print("Precision: ", "{:.2f}".format(precision), "\n")
print("fScore: ", "{:.2f}".format(fScore), "\n")

Accuracy:  0.70 

Recall:  0.43 

Precision:  0.50 

fScore:  0.47 

