We will now evaluate the performance of the classifier
There are 2 evaluation formulas
1. Used by Sahami et.al
2. Used by Koutsias et.al

The formulas in 1: 
\begin{equation}
Recall = \frac{n_{S\rightarrow S}}{n_{S\rightarrow S} + n_{S\rightarrow L}}
\end{equation}

and

\begin{equation}
Precision= \frac{n_{S\rightarrow S}}{n_{S\rightarrow S} + n_{L\rightarrow S}}
\end{equation}


The second used Cost-Sensitive evaluation formulas:

\begin{equation}
WAccuracy= \frac{\lambda*n_{L\rightarrow L}+n_{S\rightarrow S}}{\lambda*n_{L} + n_{S}}
\end{equation}

\begin{equation}
WError= \frac{\lambda*n_{L\rightarrow S}+n_{S\rightarrow L}}{\lambda*n_{L} + n_{S}}
\end{equation}


**More Information:**

1. S-S : Number of emails that are `spam` and classified  as `spam`
2. S-L : Number of emails that are `spam` and classified  as `legit`
3. L-L : Number of emails that are `legit` and classified  as `legit`
4. L-S : Number of emails that are `legit` and classified  as `spam`
5. L   : Number of emails that are `legit`
6. S   : Number of emails that are `spam`
7. Lambda: Threshold used to determine if an email is spam, there were 3 thresholds used (1, 9, and 999)

In [9]:
class Results: 
    #(bare, lemm, lemm_stop, stop)
    corpusName = ""
    #(True, False = Non-weighted)
    weighted = False
    #(1,9,999)
    threshold = 0 
    #(50-700)
    featureCount = 0
    #(spam - spam)
    SS = 0
    #(spam - legit)
    SL = 0
    #(legit - legit)
    LL = 0
    #(legit - spam)
    LS = 0
    

    def __init__(self, corpusName, weighted, threshold, featureCount, SS, SL, LL, LS):
        self.corpusName = corpusName
        self.weighted = weighted
        self.threshold = threshold
        self.featureCount = featureCount
        self.SS = SS
        self.SL = SL
        self.LL = LL
        self.LS = LS
    

In [22]:
def computeSR(SS,SL):
    return (SS/(SS+SL))

def computeSP(SS, LS):
    return (SS/(SS+LS))

def computeWAcc(thresh, LL, SS, L, S):
    return (((thresh*LL)+SS)/(((thresh*L)+S)))

def computeWErr(thresh, LS, SL, L, S):
    return (((thresh*LS)+SL)/(((thresh*L)+S)))

def computeBaseWAcc(thresh, L, S):
    return ((thresh*L)/((thresh*L)+S))

def computeBaseWErr(thresh, L, S):
    return (S/((thresh*L)+S))

def computeTCR(thresh, S, LS, SL):
    return (S/((thresh*LS)+SL))
    

In [71]:
import pandas as pd
import os
   
rootdir = "Classified/"

#for each subdirectory in a corpus (folders - part 1 - 10)
for subdir, dirs, files in os.walk(rootdir):
    for d in dirs:
        #for each file in a folder
        for file in files:  
            print (rootdir,subdir,d, file)

# df = pd.read_csv("Classified/bare/50.csv")

# print (df[:5])

In [29]:
S = len(df[(df["Actual"]  == 0)])
L = len(df[(df["Actual"]  == 1)])
SS = len(df[(df["Actual"]  == 0) & (df["Predicted"] == 0)])
SL = len(df[(df["Actual"]  == 0) & (df["Predicted"] == 1)])
LL = len(df[(df["Actual"]  == 1) & (df["Predicted"] == 1)])
LS = len(df[(df["Actual"]  == 1) & (df["Predicted"] == 0)])

print ("S",S)
print ("L",L)
print ("SS",SS)
print ("SL",SL)
print ("LL",LL)
print ("LS",LS)

S 20
L 11
SS 11
SL 9
LL 7
LS 4


In [52]:
# scatterplot.py
import numpy as np
import pylab as pl

def plotThresh(thresh, BSP, BSR, LSP, LSR, LSSP, LSSR, SSP, SSR):
    title = "Precision and Recall at Threshold: "  + str(thresh)
    
    pl.xlabel('Spam Recall')
    pl.ylabel('Spam Precision')
    pl.title(title)
    
    #set the x-axis scale
    pl.xlim(0.4, 1)
    #set the y-axis scale
    pl.ylim(0.95, 1)
    
    # Recall, Precison
    pl.plot(BSR, BSP, 'ro', label='No Lemmatizer, No Stop-list')
    pl.plot(LSR, LSP, 'bs', label='No Lemmatizer, Top-100 Stop-list')
    pl.plot(LSSR, LSSP,'gD', label='With Lemmatizer, No Stop-list')
    pl.plot(SSR, SSP, 'k*', label='With Lemmatizer, Top-100 Stop-list')
    pl.legend(loc='upper right')
    pl.grid()
    # show the plot on the screen
    pl.show()

In [58]:
# scatterplot.py
import numpy as np
import pylab as pl

def plotTCR(thresh, BTCR, LTCR, LSTCR, STCR):
    title = "TCR at Threshold: "  + str(thresh)
    
    pl.xlabel('Number of retained Attributes')
    pl.ylabel('TCR')
    pl.title(title)
    
    #set the x-axis scale
    pl.xlim(50, 700)
    #set the y-axis scale
    pl.ylim(0.0, 6.0)
    
    y = np.arange(50,750,50)
    
    # Recall, Precison
    pl.plot(y, BTCR, 'r', marker = "o", label='No Lemmatizer, No Stop-list')
    pl.plot(y, LTCR, 'b', marker = "s", label='No Lemmatizer, Top-100 Stop-list')
    pl.plot(y, LSTCR,'g', marker = "D", label='With Lemmatizer, No Stop-list')
    pl.plot(y, STCR, 'k', marker = "*", label='With Lemmatizer, Top-100 Stop-list')
    pl.legend(loc='upper right')
    pl.grid()
    # show the plot on the screen
    pl.show()

In [61]:
import pandas as pd

header = ['Filter Configurations', 'Threshold', 'No. of Attrib', 'Spam Recall', 'Spam Precision', 
          'Weighted Accuracy', 'baseline W.Acc', 'TCR']
df = pd.DataFrame(columns = header)