In [100]:
import os
import io
import math
import numpy as np
import pandas as pd
from decimal import Decimal

from sklearn.feature_extraction.text import CountVectorizer

### Data Preprocessing

Using nltk for cleaning

In [101]:
import nltk
nltk.download("popular")
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import string

stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

# Generate vocabulary of {word: {count: <number>, spam_doc_count: <number>, ham_doc_count: <number>}, total_legit_emails: <number>, total_spam_emails: <number>}
meta = {}
test_dir_index = 10

class Preprocess:
  def pipeline(self, text, classification, dir_index):
    self.classification = classification
    #1. Preprocess Text
    words = self.process_text(text)

    #2. Lemmetize
    # lemmetized_words = self.word_lemmatizer(words)

    #3. Stem words, not considering as already lemmetized
    # stemmed_words = self.word_stemmer(lemmetized_words)
    # if test_dir_index != dir_index:
    #   self.update_counts(words, classification)
    
    return " ".join(words)

  def process_text(self, text):
      # print(text)
      word_tokens = nltk.word_tokenize(text)

      nopunc = [word.lower() for word in word_tokens if word.isalpha() and len(word) > 1]

      clean_words = [word for word in nopunc if word not in stopwords.words('english')]
      return clean_words

  def word_stemmer(self, words):
      stem_words = [stemmer.stem(o) for o in words]
      return stem_words

  def word_lemmatizer(self, words):
      lemma_words = [lemmatizer.lemmatize(o) for o in words]
      return lemma_words

  def update_counts(self, words, classification):
      doc_meta = {}
      for word in words:
        if word not in doc_meta:
          doc_meta[word] = {}
          doc_meta[word]['count'] = 1
          if classification == 'ham':
            doc_meta[word]['ham_doc_count'] = 1
            doc_meta[word]['spam_doc_count'] = 0
            doc_meta[word]['ham_word_count'] = 1
            doc_meta[word]['spam_word_count'] = 0
          else:
            doc_meta[word]['spam_doc_count'] = 1
            doc_meta[word]['ham_doc_count'] = 0
            doc_meta[word]['spam_word_count'] = 1
            doc_meta[word]['ham_word_count'] = 0
        else:
          doc_meta[word]['count'] += 1
          if classification == 'ham':
            doc_meta[word]['ham_word_count'] += 1
          else:
            doc_meta[word]['spam_word_count'] += 1

      for word in doc_meta:
        if word in meta:
          meta[word]['count'] += doc_meta[word]['count']
          meta[word]['spam_doc_count'] += doc_meta[word]['spam_doc_count']
          meta[word]['ham_doc_count'] += doc_meta[word]['ham_doc_count']
          meta[word]['ham_word_count'] += doc_meta[word]['ham_word_count']
          meta[word]['spam_word_count'] += doc_meta[word]['spam_word_count']
        else:
          meta[word] = {}
          meta[word]['count'] = 1
          meta[word]['class'] = {'ham': False, 'spam':False}
          meta[word]['ham_doc_count'] = doc_meta[word]['ham_doc_count']
          meta[word]['spam_doc_count'] = doc_meta[word]['spam_doc_count']
          meta[word]['ham_word_count'] = doc_meta[word]['ham_word_count']
          meta[word]['spam_word_count'] = doc_meta[word]['spam_word_count']
          if classification == "ham":
            meta[word]['class']['ham'] = True
          else:
            meta[word]['class']['spam'] = True


[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to /root/nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package gazetteers to /root/nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to /root/nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to /root/nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package inaugural to /root/nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to-date!
[nltk_data]    | Downloading package names to /root/nltk_data...
[nltk_data]    |   Package names is already up-to-date!
[nltk_data]    | Do

#### Preprocessing all files

In [102]:
def readFiles(path, dir_index):
  pipeline = Preprocess()
  for root, dirnames, filenames in os.walk(path):
    for filename in filenames:
      path = os.path.join(root, filename)
      if 'spm' in filename:
        classification = 'spam'
      else:
        classification = 'ham'
      inBody = False
      lines = []
      f = io.open(path, 'r')
      text = f.read()
      message = pipeline.pipeline(text, classification, dir_index)
      if dir_index!=test_dir_index:
        pipeline.update_counts(message.split(), classification)
      f.close()
      yield path, message, classification

def dataFrameFromDirectory(path, dir_index):
  rows = []
  index = []
  for filename, message, classification in readFiles(path, dir_index):
    rows.append({'message': message, 'class': classification})
    index.append(filename)
  print("Directory " + str(path.split("/")[-1]) + " preprocessed!")
  return pd.DataFrame(rows, index=index)


train_data = pd.DataFrame({'message': [], 'class': []})
test_data = pd.DataFrame({ 'message': [] })

dir = './drive/My Drive/Colab Notebooks/MLCyberSec/lingspam_data/lemm_stop/part'
for dir_index in range(1,10):
  train_data = train_data.append(dataFrameFromDirectory(dir+str(dir_index), dir_index))

test_data = test_data.append(dataFrameFromDirectory(dir+str(test_dir_index), test_dir_index))

Directory part1 preprocessed!
Directory part2 preprocessed!
Directory part3 preprocessed!
Directory part4 preprocessed!
Directory part5 preprocessed!
Directory part6 preprocessed!
Directory part7 preprocessed!
Directory part8 preprocessed!
Directory part9 preprocessed!
Directory part10 preprocessed!


##### Meta information for getting the IG values

In [103]:
def custom_log(a,b):
  if a == b or a == 0 or b == 0:
    return 0
  if (a/b) < 0:
    return 0
  term = a/b
  return math.log2(term)

def custom_division(a,b):
  return (a + 1)/(b + 2)


total_legit_emails = (train_data['class'] == 'ham').sum()
total_spam_emails = (train_data['class'] == 'spam').sum()

total_emails = total_legit_emails + total_spam_emails

p = total_legit_emails / total_emails

H_c = (-1 * p * math.log2(p)) - ((1-p) * math.log2(1-p))

def calculate_word_H_c(word):
  H_legit_word_not_appearing = custom_division(total_legit_emails - meta[word]['ham_doc_count'], total_emails) * custom_log(total_legit_emails - meta[word]['ham_doc_count'],(total_spam_emails - meta[word]['spam_doc_count']) + (total_legit_emails - meta[word]['ham_doc_count']))

  H_spam_word_not_appearing = custom_division(total_spam_emails - meta[word]['spam_doc_count'], total_emails) * custom_log(total_spam_emails - meta[word]['spam_doc_count'],(total_spam_emails - meta[word]['spam_doc_count']) + (total_legit_emails - meta[word]['ham_doc_count']))

  H_legit_word_appearing = custom_division(meta[word]['ham_doc_count'], total_emails) * custom_log(meta[word]['ham_doc_count'],meta[word]['spam_doc_count'] + meta[word]['ham_doc_count'])

  H_spam_word_appearing = custom_division(meta[word]['spam_doc_count'], total_emails) * custom_log(meta[word]['spam_doc_count'],meta[word]['spam_doc_count'] + meta[word]['ham_doc_count'])

  H_c_word = H_legit_word_not_appearing + H_spam_word_not_appearing + H_legit_word_appearing + H_spam_word_appearing
  
  return -1 * H_c_word


#### Calculating the IG values

In [104]:
from collections import OrderedDict 
from operator import getitem 

def IG():
  for word in meta:
    H_c_word = calculate_word_H_c(word)
    IG_word = H_c - H_c_word
    meta[word]['ig_value'] = IG_word

def top_n_features(n):
  IG()
  top_features = {}
  res = dict( sorted(meta.items(),
                           key=lambda item: item[1]['ig_value'],
                           reverse=True))
  i = 0
  for word in res:
    if i >= n:
      break
    else:
      top_features[word] = meta[word]['ig_value']

    i+=1

  return top_features



print("Print top 10 words")
ig_features_10 = top_n_features(10)
print(top_n_features(10))

print("Print top 100 words")
ig_features_100 = top_n_features(100)
print(top_n_features(100))

print("Print top 1000 words")
ig_features_1000 = top_n_features(1000)
print(top_n_features(1000))



Print top 10 words
{'language': 0.2010193994504887, 'remove': 0.1666583128516556, 'free': 0.16383672074046435, 'linguistic': 0.14930190818557804, 'university': 0.1421386466427209, 'money': 0.11688064490039662, 'click': 0.09904894902615902, 'market': 0.09075597352704434, 'business': 0.08517143618354617, 'today': 0.0797664112767188}
Print top 100 words
{'language': 0.2010193994504887, 'remove': 0.1666583128516556, 'free': 0.16383672074046435, 'linguistic': 0.14930190818557804, 'university': 0.1421386466427209, 'money': 0.11688064490039662, 'click': 0.09904894902615902, 'market': 0.09075597352704434, 'business': 0.08517143618354617, 'today': 0.0797664112767188, 'advertise': 0.07712160251861178, 'product': 0.07694204715338027, 'company': 0.0753315896488711, 'sell': 0.07494575438175077, 'linguistics': 0.07427573807005083, 'million': 0.0733308149162607, 'internet': 0.07208235800324492, 'english': 0.07159783786350504, 'income': 0.07102058425689017, 'day': 0.07014175232708164, 'save': 0.067520

In [105]:
test_meta = {}
for index, row in test_data.iterrows():
  test_meta[index] = {}
  for word in row['message'].split():
    if word not in test_meta:
      test_meta[index][word] = {}

### 1. Bernoulli Naive Bayes

In [106]:
def cal_nb_accuracy(ig_features):
  correct = 0
  tp = 0
  tn = 0
  fp = 0
  fn = 0
  for index, row in test_data.iterrows():
    p_x_spam = 1.0
    p_spam_x = 1.0
    p_x_legit = 1.0
    p_legit_x = 1.0
    for word in ig_features:
        if word in test_meta[index]:
          p_x_spam = (1 + meta[word]['spam_doc_count']) / (2 + total_spam_emails)
          p_x_legit = (1 + meta[word]['ham_doc_count']) / (2 + total_legit_emails)
        else:
          p_x_spam = 1 - ((1 + meta[word]['spam_doc_count']) / (2 + total_spam_emails))
          p_x_legit = 1 - ((1 + meta[word]['ham_doc_count']) / (2 + total_legit_emails))
        p_spam_x *= p_x_spam
        p_legit_x *= p_x_legit

    p_spam = custom_division(total_spam_emails, total_emails)
    p_legit = custom_division(total_legit_emails, total_emails)
    if (p_spam_x * p_spam) > (p_legit_x * p_legit):
      prediction = "spam"
    else:
      prediction = "ham"
    if prediction == row['class']:
      correct += 1
      if prediction == "spam":
        tp += 1
      else:
        tn += 1
    else:
      if prediction == "spam":
        fp += 1
      else:
        fn += 1
    # print("Prediction is " + str(prediction) +" and value is " + str(p_x_spam) + " actual is " + row['class'])
  print("Accuracy is %.2f" % (correct/len(test_data)*100))
  print("Spam Precision is %.2f and Spam recall is %.2f" % ((tp/(tp+fp))*100, (tp/(tp+fn))*100))

  

In [107]:
print("With 10 Features")
cal_nb_accuracy(ig_features_10)

print("With 100 Features")
cal_nb_accuracy(ig_features_100)

print("With 1000 Features")
cal_nb_accuracy(ig_features_1000)

With 10 Features
Accuracy is 94.50
Spam Precision is 85.11 and Spam recall is 81.63
With 100 Features
Accuracy is 94.85
Spam Precision is 100.00 and Spam recall is 69.39
With 1000 Features
Accuracy is 94.16
Spam Precision is 100.00 and Spam recall is 65.31


### 2. Multinomial Naive Bayes with Binary Features

In [108]:
def cal_features_occurences(features):
  total_occurrences_legit = 0
  total_occurrences_spam = 0
  for word in features:
      total_occurrences_spam += meta[word]['spam_word_count']
      total_occurrences_legit += meta[word]['ham_word_count']

  return total_occurrences_legit, total_occurrences_spam

def cal_nb_binary_accuracy(ig_features):
  correct = 0
  tp = 0
  tn = 0
  fp = 0
  fn = 0
  for index, row in test_data.iterrows():
    p_x_spam = 1.0
    p_spam_x = 1.0
    p_x_legit = 1.0
    p_legit_x = 1.0
    total_occurrences_legit, total_occurrences_spam = cal_features_occurences(ig_features)     
    for word in ig_features:
        if word in test_meta[index]:
          p_x_spam = (1 + meta[word]['spam_word_count']) / (len(ig_features.keys()) + total_occurrences_spam)
          p_x_legit = (1 + meta[word]['ham_word_count']) / (len(ig_features.keys()) + total_occurrences_legit)
          p_spam_x *= p_x_spam
          p_legit_x *= p_x_legit
        
    p_spam = total_spam_emails/total_emails
    p_legit = total_legit_emails/ total_emails

    if (p_spam_x * p_spam) > (p_legit_x * p_legit):
      prediction = "spam"
    else:
      prediction = "ham"
    
    if prediction == row['class']:
      correct += 1
      if prediction == "spam":
        tp += 1
      else:
        tn += 1
    else:
      if prediction == "spam":
        fp += 1
      else:
        fn += 1
    # print("Prediction is " + str(prediction) +" and value is " + str(p_x_spam) + " actual is " + row['class'])

  print("Accuracy is %.2f" % (correct/len(test_data)*100))
  print("Spam Precision is %.2f and Spam recall is %.2f" % (tp/(tp+fp)*100, (tp/(tp+fn))*100))


print("With 10 Features")
cal_nb_binary_accuracy(ig_features_10)

print("With 100 Features")
cal_nb_binary_accuracy(ig_features_100)

print("With 1000 Features")
cal_nb_binary_accuracy(ig_features_1000)


With 10 Features
Accuracy is 94.50
Spam Precision is 85.11 and Spam recall is 81.63
With 100 Features
Accuracy is 98.28
Spam Precision is 95.83 and Spam recall is 93.88
With 1000 Features
Accuracy is 98.63
Spam Precision is 100.00 and Spam recall is 91.84


### 3. Multinomial Naive Bayes with Term Frequencies

In [109]:
from collections import Counter
def cal_features_occurences(features):
  total_occurrences_legit = 0
  total_occurrences_spam = 0
  for word in features:
    total_occurrences_spam += meta[word]['spam_word_count']
    total_occurrences_legit += meta[word]['ham_word_count']

  return total_occurrences_legit, total_occurrences_spam


def cal_nb_accuracy(ig_features):
  correct = 0
  tp = 0
  tn = 0
  fp = 0
  fn = 0
  for index, row in test_data.iterrows():
    p_x_spam = Decimal(1.0)
    p_spam_x = Decimal(1.0)
    p_x_legit = Decimal(1.0)
    p_legit_x = Decimal(1.0)
    total_occurrences_legit, total_occurrences_spam = cal_features_occurences(ig_features)

    word_count_email = dict(Counter(row['message'].split()))
    
    for word in ig_features:
        if word in test_meta[index]:
          n_spam = Decimal(pow(1 + meta[word]['spam_word_count'] / (len(ig_features.keys()) + total_occurrences_spam), word_count_email[word]))
          p_x_spam =  (n_spam/Decimal(math.factorial(word_count_email[word])))
          n_legit = Decimal(pow(1 + meta[word]['ham_word_count'] / (len(ig_features.keys()) + total_occurrences_legit), word_count_email[word]))
          p_x_legit = (n_legit/Decimal(math.factorial(word_count_email[word])))
          p_spam_x *= Decimal(p_x_spam) if Decimal(p_x_spam) > 0 else 0
          p_legit_x *= Decimal(p_x_legit) if Decimal(p_x_legit) > 0 else 0
    p_spam = Decimal(total_spam_emails/total_emails)
    p_legit = Decimal(total_spam_emails/total_emails)
    if p_spam_x * p_spam > p_legit_x * p_legit:
      prediction = "spam"
    else:
      prediction = "ham"
    if prediction == row['class']:
      correct += 1
      if prediction == "spam":
        tp += 1
      else:
        tn += 1
    else:
      if prediction == "spam":
        fp += 1
      else:
        fn += 1
    # print("Prediction is " + str(prediction) +" and value is " + str(p_x_spam) + " actual is " + row['class'])

  print("Accuracy is %s" % (str(correct/len(test_data)*100)))
  print("Spam Precision is %.2f and Spam recall is %.2f" % ((tp/(tp+fp))*100, (tp/(tp+fn))*100))

print("With 10 Features")
cal_nb_accuracy(ig_features_10)
print("With 100 Features")
cal_nb_accuracy(ig_features_100)
print("With 1000 Features")
cal_nb_accuracy(ig_features_1000)

With 10 Features
Accuracy is 95.1890034364261
Spam Precision is 88.89 and Spam recall is 81.63
With 100 Features
Accuracy is 99.3127147766323
Spam Precision is 96.08 and Spam recall is 100.00
With 1000 Features
Accuracy is 98.96907216494846
Spam Precision is 97.92 and Spam recall is 95.92


### 4. SVM Based Spam Filtering

In [110]:
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.metrics import confusion_matrix
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split

#### SVM, using BF Features, as it will scale well for larger calcalations in SVM

##### Feature Selection Methods
1. Count Vectorizer
2. Chi2

#### Count Vectorizer Feature Selection (Selecting Top 1000, to account for more features)

In [117]:
# Create features and target

X, Y = train_data['message'].values, train_data['class'].values
cv = CountVectorizer(binary=True,max_features=1000, stop_words='english')
X_vec = cv.fit_transform(X)

cv_features = dict(zip(cv.get_feature_names(),
               mutual_info_classif(X_vec, Y, discrete_features=True)
               ))
cv_features = dict(sorted(cv_features.items(),
                           key=lambda item: item[1],
                           reverse=True))
Y = list(map(lambda x: 0 if x == 'ham' else 1, Y))

print(cv_features)

{'language': 0.1417098418802516, 'remove': 0.11708162028112695, 'free': 0.11487571246222325, 'linguistic': 0.10383070241095627, 'university': 0.10051100506124952, 'money': 0.08226087470903995, 'click': 0.07012878848281247, 'market': 0.06412179203771973, 'business': 0.06018465607204379, 'today': 0.056414812078056584, 'advertise': 0.054907317133096226, 'product': 0.054504462000284255, 'company': 0.05336577652055174, 'sell': 0.053262440137394146, 'million': 0.05209372138642536, 'linguistics': 0.05185551435786284, 'english': 0.05170422981001736, 'internet': 0.0510628382209886, 'income': 0.0506777339455786, 'day': 0.049736463040131795, 'save': 0.048034686448994016, 'guarantee': 0.047645546495954025, 'thousand': 0.047458865643250074, 'best': 0.046912957127818455, 'easy': 0.04664520141083861, 'check': 0.045110726711149574, 'purchase': 0.04492828775423398, 'buy': 0.0444310740322575, 'cash': 0.044254645866456575, 'win': 0.04419238947316345, 'bulk': 0.0434038409055623, 'want': 0.0431539681188576

#### Chi2 Features

In [118]:
chi2_selector = SelectKBest(chi2, k=1000)
X_kbest = chi2_selector.fit_transform(X_vec.toarray(), Y)

ch_features = dict(zip(cv.get_feature_names(),
               mutual_info_classif(X_kbest, Y, discrete_features=True)
               ))

ch_features = dict(sorted(ch_features.items(),
                           key=lambda item: item[1],
                           reverse=True))
print(ch_features)

{'language': 0.1417098418802516, 'remove': 0.11708162028112695, 'free': 0.11487571246222325, 'linguistic': 0.10383070241095627, 'university': 0.10051100506124952, 'money': 0.08226087470903995, 'click': 0.07012878848281247, 'market': 0.06412179203771973, 'business': 0.06018465607204379, 'today': 0.056414812078056584, 'advertise': 0.054907317133096226, 'product': 0.054504462000284255, 'company': 0.05336577652055174, 'sell': 0.053262440137394146, 'million': 0.05209372138642536, 'linguistics': 0.05185551435786284, 'english': 0.05170422981001736, 'internet': 0.0510628382209886, 'income': 0.0506777339455786, 'day': 0.049736463040131795, 'save': 0.048034686448994016, 'guarantee': 0.047645546495954025, 'thousand': 0.047458865643250074, 'best': 0.046912957127818455, 'easy': 0.04664520141083861, 'check': 0.045110726711149574, 'purchase': 0.04492828775423398, 'buy': 0.0444310740322575, 'cash': 0.044254645866456575, 'win': 0.04419238947316345, 'bulk': 0.0434038409055623, 'want': 0.0431539681188576

#### Features are mostly the same as IG calcaultions done above, proceeding to calculate scores on the cross validation sets

 #### Cross Validation on the training data set (Count Vectorizer Features)

 ##### Using the following parameters for SVM

 1. Train Test Split - 90/10
 2. C=1, Regularization is not needed here as we are selecting the top features so no chance of overfitting 
 3. Kernel is Linear as we need to classify between two classes

In [121]:
from sklearn import svm
y = np.asarray(Y)

## Train/Test Split 9 parts : 1 part on the train data set
X_train, X_test, y_train, y_test = train_test_split(
    X_vec, y, test_size=0.1, random_state=0)

## Last part10 test data
X_last_part, y_last_part = test_data['message'].values, test_data['class'].values
X_vec_last_part_test = cv.fit_transform(X_last_part)
y_last_part = list(map(lambda x: 0 if x == 'ham' else 1, y_last_part))
y = np.asanyarray(y_last_part)

clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)


print(f"Accuracy on the test split: {clf.score(X_test, y_test)*100}")

print(f"Accuracy on the part10 test set: {clf.score(X_vec_last_part_test, y_last_part)*100}")


Accuracy on the test split: 99.23371647509579
Accuracy on the part10 test set: 79.72508591065292


In [120]:
from sklearn.metrics import average_precision_score

y_score = clf.decision_function(X_test)
average_precision = average_precision_score(y_test, y_score)
print('Precision-recall score: {0:0.2f}'.format(
      average_precision))

Precision-recall score: 1.00


### 5. Adversarial Classification

Adversarial Strategies for an attacker to have spam classified emails to get it predicted as ham. We calculate the `log_odds` of all such spam predicted emails first using the adversary strategy.

The function `cal_log_odds_features` are common to all test emails, where we calculate the `log_odds` for every word in the feature.

The function `cal_log_odds_features_for_defense` is taking into account the words which the attacker added into that test email and finally calculating the `log_odds` for those words which were newly added in that email only, i.e. x'

`log_odds_document_before` - dict which keeps track of all the log_odds of documents before attackers strategy.

`log_odds_document_after` - dict which keeps track of all the log_odds of documents after the attackers strategy.

`log_odds_document_defense` - dict which keeps track of all the log_odds of documents to prepare for the defense strategy 

In [115]:
from collections import defaultdict
import operator

def cal_log_odds_features(ig_features):
  log_odds = defaultdict()
  log_odds_negative = defaultdict()

  total_occurrences_legit, total_occurrences_spam = cal_features_occurences(ig_features)
  p_x_spam = 1.0
  p_spam_x = 1.0
  p_x_legit = 1.0
  p_legit_x = 1.0     
  for word in ig_features:
    p_x_spam = (1 + meta[word]['spam_word_count']) / (len(ig_features.keys()) + total_occurrences_spam)
    p_x_legit = (1 + meta[word]['ham_word_count']) / (len(ig_features.keys()) + total_occurrences_legit)
    log_odds[word] = custom_log(p_x_spam, p_x_legit)
    log_odds_negative[word] = custom_log(1-p_x_spam, 1 - p_x_legit)

  return log_odds, log_odds_negative

def cal_log_odds_features_for_defense(ig_features, words_used_by_attacker):
  log_odds = defaultdict()
  log_odds_negative = defaultdict()

  total_occurrences_legit, total_occurrences_spam = cal_features_occurences(ig_features)
  p_x_spam = 1.0
  p_spam_x = 1.0
  p_x_legit = 1.0
  p_legit_x = 1.0     
  
  p_x_prime_word = 1.0
  for word in words_used_by_attacker:
      p_x_prime_word += (1 + meta[word]['spam_word_count']) / (len(ig_features.keys()) + total_occurrences_spam)
  for word in ig_features:
      p_x_spam = (1 + meta[word]['spam_word_count']) / (len(ig_features.keys()) + total_occurrences_spam)
      p_x_legit = (1 + meta[word]['ham_word_count']) / (len(ig_features.keys()) + total_occurrences_legit)
      if word in words_used_by_attacker:
        p_x_spam = p_x_prime_word
      log_odds[word] = custom_log(p_x_spam, p_x_legit)
      log_odds_negative[word] = custom_log(1-p_x_spam, 1 - p_x_legit)
  

  return log_odds, log_odds_negative

  
def cal_log_odds(ig_features):
  log_odds_document_defense = defaultdict()
  log_odds_document_after = defaultdict()
  log_odds_document_before = defaultdict()
  delta_i = defaultdict()
  words_to_remove_or_include = defaultdict()
  sorted_features_words = defaultdict()
  log_odds, log_odds_negative = cal_log_odds_features(ig_features)
  words_added = defaultdict()
  
  total_cost_of_attacker = 0
  total_spam_emails_which_attacker_changed = 0

  correct = 0
  tp = 0
  tn = 0
  fp = 0
  fn = 0

  correct_after = 0
  tp_after = 0
  tn_after = 0
  fp_after = 0
  fn_after = 0

  correct_defense = 0
  tp_defense = 0
  tn_defense = 0
  fp_defense = 0
  fn_defense = 0

  # iterating over the test docs
  for index, row in test_data.iterrows():
    p_spam = total_spam_emails/total_emails
    p_legit = total_legit_emails/total_emails
    words_added[index] = []
    log_odds_document_before[index] = 0

    # summing log odds of all the words in doc
    for word in log_odds:
      if word in test_meta[index]:
        log_odds_document_before[index] += log_odds[word]

    log_odds_document_before[index] += custom_log(p_spam, p_legit)
    
    # comparing with threshold with current log odds 
    if log_odds_document_before[index] > 0:
      prediction = "spam"
    else:
      prediction = "ham"

    if prediction == row['class']:
      correct += 1
      if prediction == "spam":
        tp += 1
      else:
        tn += 1
    else:
      if prediction == "spam":
        fp += 1
      else:
        fn += 1
    
    words_to_remove_or_include[index] = defaultdict()

    delta_i[index] = defaultdict()
    sorted_features_words[index] = defaultdict()
    words_can_be_added_from_features = defaultdict()

    ## copying all the log odds before making the modifications
    log_odds_document_after = log_odds_document_before
    
    # Only considering strategies for spam emails
    if prediction == "spam":
      total_spam_emails_which_attacker_changed += 1
      print("These are the terms which can be flipped/removed for document - %s" % (index[75:]))
      for word in ig_features:
          # calculating deltas (cost), to include or not include a word from the features
          delta_i[index][word] = 0
          if word in test_meta[index]:
            delta_i[index][word] = max(log_odds[word] - log_odds_negative[word], 0)
            if delta_i[index][word] > 0:
              words_to_remove_or_include[index][word] = delta_i[index][word]
          else:
            delta_i[index][word] = min(log_odds[word] - log_odds_negative[word], 0)
            if delta_i[index][word] < 0:
              words_can_be_added_from_features[word] = log_odds[word]
      print(list(words_to_remove_or_include[index].keys()))
      
      words_can_be_added_from_features = OrderedDict(sorted(words_can_be_added_from_features.items(), key=lambda x: x[1]))
      words_added[index] = []
    
      print("Words which the spammer can add with min cost")
      
      # Add words Strategy (Greedy)
      i = 0
      items = list(words_can_be_added_from_features.keys())
      
      # modifying log odds for all words in the spam predicted emails
      while log_odds_document_after[index] > 0 and i < len(items):
          log_odds_document_after[index] += words_can_be_added_from_features[items[i]]
          words_added[index].append(items[i])
          i+=1
          
      print(words_added[index])
      cost = 0
      for word in words_added[index]:
        cost += log_odds[word]

      print("====================Cost Incurred=================")
      print(abs(cost))
      total_cost_of_attacker += abs(cost)

    ## "================Changes by Defense============")
      
    # calculating new log odds before making the modifications
    log_odds_document_defense[index] = 0

    # Computing new log odds document for accounting for the changed words
    
    log_odds_new, log_odds_negative_new = cal_log_odds_features_for_defense(ig_features, words_added[index])
      
    
    for word in log_odds_new:
      if word in test_meta[index]:
        log_odds_document_defense[index] += log_odds_new[word]

    
    ## Comparing the new log odds value now since we added words
    if log_odds_document_after[index] > 0:
      prediction = "spam"
    else:
      prediction = "ham"

    if prediction == row['class']:
      correct_after += 1
      if prediction == "spam":
        tp_after += 1
      else:
        tn_after += 1
    else:
      if prediction == "spam":
        fp_after += 1
      else:
        fn_after += 1

 
    # Comparing the new log odds value now for the defense strategies since we changed probabilites
    if log_odds_document_defense[index] > 0:
      prediction = "spam"
    else:
      prediction = "ham"

    if prediction == row['class']:
      correct_defense += 1
      if prediction == "spam":
        tp_defense += 1
      else:
        tn_defense += 1
    else:
      if prediction == "spam":
        fp_defense += 1
      else:
        fn_defense += 1

  average_cost = total_cost_of_attacker/total_spam_emails_which_attacker_changed
  print("\n")
  print("\n")
  print("\n")
  print("Accuracy for log odds strategy of classification before attacker's strategy was %s" % (str(correct/len(test_data)*100)))
  print("Accuracy for log odds strategy of classification after attacker's strategy was %s" % (str(correct_after/len(test_data)*100)))
  print("Accuracy for log odds strategy of classification after defense's strategy was %s" % (str(correct_defense/len(test_data)*100)))

  print("=============ATTACKER===STATISTICS===========")
  print("Spam Precision before attacker's strategy is %.2f and Spam recall is %.2f" % ((tp/(tp+fp))*100, (tp/(tp+fn))*100))
  print("Spam Precision after attacker's strategy is %.2f and Spam recall is %.2f" % ((tp_after/(tp_after+fp_after))*100, (tp_after/(tp_after+fn_after))*100))
  print("Spam Precision after defense's strategy is %.2f and Spam recall is %.2f" % ((tp_defense/(tp_defense+fp_defense))*100, (tp_defense/(tp_defense+fn_defense))*100))

  print("============================")
  print("False negatives before attacker's strategy (i.e. predicting these as correct hams)", fn)
  print("False negatives after attacker's strategy (i.e. predicting more hams incorrectly now) ", fn_after)
  print("False negative rate", (fn_after/(fn_after + tp_after)))
  print("Average cost of the attacker", average_cost)

  print("\n")
  print("\n")
  print("\n")

  print("===========DEFENSE===STATISTICS=============")
  print("False Positives before defense strategy", fp_after)
  print("False Positives after defense strategy", fp_defense)
  print("False Positive rate", (fp_defense/(fp_defense + tn_defense)))


  print("============================")
  print("False negatives before defense strategy", fn_after)
  print("False negatives after defense strategy (i.e. the incorrect hams which have reduced now, back to what they were", fn_defense)
  print("False negative rate", (fn_defense/(fn_defense + tp_defense)))

In [116]:
cal_log_odds(ig_features_10)

These are the terms which can be flipped/removed for document - 9-625msg1.txt
['free']
Words which the spammer can add with min cost
['linguistic']
9.50847341004848
These are the terms which can be flipped/removed for document - 9-736msg2.txt
['business']
Words which the spammer can add with min cost
['linguistic']
9.50847341004848
These are the terms which can be flipped/removed for document - 9-826msg1.txt
['business']
Words which the spammer can add with min cost
['linguistic']
9.50847341004848
These are the terms which can be flipped/removed for document - 9-884msg1.txt
['remove', 'business']
Words which the spammer can add with min cost
['linguistic']
9.50847341004848
These are the terms which can be flipped/removed for document - 9-902msg1.txt
['free', 'market', 'today']
Words which the spammer can add with min cost
['language']
6.537155970948858
These are the terms which can be flipped/removed for document - 9-887msg1.txt
['market', 'business']
Words which the spammer can add wi