In [None]:
# https://cmdlinetips.com/2020/03/linear-regression-using-matrix-multiplication-in-python-using-numpy/
# https://developer.ibm.com/articles/linear-regression-from-scratch/
# https://coolum001.github.io/numpyols.html
# https://online.stat.psu.edu/stat462/node/132/

# Multivariable Logistic Regression Estimation using OLS
import numpy as np
import pandas as pd
from numpy import exp

In [None]:
#normalize X values to help model converge
def normalize(data):
    mean = np.mean(data, axis=1)
    std = np.std(data, axis=1)
    data = (data - mean) / std
    return data

In [None]:
#retrieve and format data - into labels and examples from the dataset
def features_and_labels(filename):
    data = np.matrix(pd.read_csv(filename).values)

    #we need to transpose data sets to be compatable with our weight vectors
    data_y = data[:, 0]
    data_y-=1 #binaryization of the categorical data

    data_x = data[:, 1:]
    data_x = normalize(data_x)
    return data_x, data_y

In [None]:
training_data_x, training_data_y = features_and_labels("../archive/exoTrain.csv")

# Ordinary Least Squares (Estimation of the Weight vector)
inv = np.linalg.inv(training_data_x.T.dot(training_data_x))
W = inv.dot(training_data_x.T).dot(training_data_y) # OLS
W = W.T

In [None]:
#Logistic Layer using a sigmoid function
def logistic_layer(y):
    y = np.array(y)
    y = 1 / (1 + exp(-y)) # sigmoid function
    y = y.ravel()
    return y

In [None]:
#Calculate an accuracy metric
def accuracy(predicted_y, true_y):
    true_y = np.array(true_y).ravel()
    counter = 0
    for i in range(len(true_y)):
        p_y = predicted_y[i]
        t_y = true_y[i]
        if (p_y>.5 and t_y == 1) or (p_y < .5 and t_y == 0):
            counter+=1
    counter = (counter/ len(true_y)) * 100
    return counter

In [None]:
test_x, test_y = features_and_labels("../archive/exoTest.csv")

predicted_y = W * test_x.T
predicted_y = logistic_layer(predicted_y)

print("Accuracy percentage: ", accuracy(predicted_y, test_y), "%")

Accuracy percentage:  45.438596491228076 %


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=1c58af11-5d4f-40f3-a808-e868f2c28485' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>