In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import random
import matplotlib.pyplot as plt

In [2]:
table = np.genfromtxt("spambase.data", delimiter=",")

np.random.seed(0)
np.random.shuffle(table)

In [3]:
X = table[:, :-1]
Y = table[:, -1]

trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.333, shuffle=False)

In [4]:
mean = np.mean(trainX, axis=0)
std = np.std(trainX, axis=0, ddof=1)

sTrainX = (trainX-mean)/std
sTestX = (testX-mean)/std

In [5]:
sTrainXSpam = sTrainX[trainY == 1]
sTrainXNonSpam = sTrainX[trainY == 0]

priorSpam = sTrainXSpam.shape[0]/sTrainX.shape[0]
spamMean = np.mean(sTrainXSpam, axis=0)
spamStd = np.std(sTrainXSpam, axis=0, ddof=1) + np.finfo(float).eps

priorNonSpam = sTrainXNonSpam.shape[0]/sTrainX.shape[0]
nonSpamMean = np.mean(sTrainXNonSpam, axis=0)
nonSpamStd = np.std(sTrainXNonSpam, axis=0, ddof=1) + np.finfo(float).eps

In [6]:
truePos, trueNeg = 0, 0
falsePos, falseNeg = 0, 0

for i in range(sTestX.shape[0]):
    s = sTestX[i]
    spamPred = priorSpam
    nonSpamPred = priorNonSpam
    for j in range(len(s)):
        spamPred *= 1/(spamStd[j] * np.sqrt(2 * np.pi)) * np.exp(-((s[j]-spamMean[j])**2)/(2*spamStd[j]**2))
        nonSpamPred *= 1/(nonSpamStd[j] * np.sqrt(2 * np.pi)) * np.exp(-((s[j]-nonSpamMean[j])**2)/(2*nonSpamStd[j]**2))
    if nonSpamPred > spamPred:
        if testY[i] == 0:
            trueNeg += 1
        else: 
            falseNeg += 1
    else:
        if testY[i] == 1:
            truePos += 1
        else:
            falsePos += 1

In [7]:
precision = truePos/(truePos + falsePos)
recall = truePos/(truePos + falseNeg)
f_measure = 2 * precision * recall/(precision + recall)
accuracy = (truePos + trueNeg)/testY.shape[0]


print("Precision: ", precision)
print("Recall: ", recall)
print("F_measure: ", f_measure)
print("Accuracy: ", accuracy)

Precision:  0.6232044198895028
Recall:  0.9791666666666666
F_measure:  0.7616475354490209
Accuracy:  0.7697325505544683
