In [1]:
import numpy as np
import random
import math
from sklearn.model_selection import train_test_split

In [2]:
table = np.genfromtxt("spambase.data", delimiter=",")

np.random.seed(0)
np.random.shuffle(table)

In [3]:
X = table[:, :-1]
Y = table[:, -1]

trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.333, shuffle=False)

In [4]:
mean = np.mean(trainX, axis=0)
std = np.std(trainX, axis=0, ddof=1)

sTrainX = (trainX-mean)/std
sTestX = (testX-mean)/std

trainBias = np.ones((sTrainX.shape[0], 1))
sTrainX = np.append(sTrainX, trainBias, axis=1)

testBias = np.ones((sTestX.shape[0], 1))
sTestX = np.append(sTestX, testBias, axis=1)

In [5]:
def getSigmoid(sTrainX, theta):
    return 1 / (1 + np.exp(-sTrainX @ theta))

In [6]:
theta = np.array([random.uniform(-1,1) for i in range(len(sTrainX[0]))])

prevCost = 0
n = 0.01
k = 0

while k < 10000:
    theta += (n/sTrainX.shape[0]) * sTrainX.T @ (trainY - getSigmoid(sTrainX, theta))
    currCost = trainY @ np.log(getSigmoid(sTrainX, theta) + np.finfo(float).eps) + (1-trainY) @ np.log(1 - getSigmoid(sTrainX, theta) + np.finfo(float).eps)  
    
    if np.abs(currCost - prevCost) < math.pow(2, -23):
        break
    
    prevCost = currCost
    k += 1

In [7]:
pred = getSigmoid(sTestX, theta)

truePos, trueNeg = 0, 0
falsePos, falseNeg = 0, 0

for i in range(len(sTestX)):
    if pred[i] > 0.5:
        if testY[i] == 1:
            truePos += 1
        else:
            falsePos += 1
    else:
        if testY[i] == 0:
            trueNeg += 1
        else: 
            falseNeg += 1
            
precision = truePos/(truePos + falsePos)
recall = truePos/(truePos + falseNeg)
f_measure = 2 * precision * recall/(precision + recall)
accuracy = (truePos + trueNeg)/testY.shape[0]


print("Precision:", precision)
print("Recall:", recall)
print("F_measure:", f_measure)
print("Accuracy:", accuracy)


Precision: 0.9066901408450704
Recall: 0.8940972222222222
F_measure: 0.9003496503496503
Accuracy: 0.9256360078277887
