In [None]:
import os

class SpamDetector:
    def __init__(self, spam_file):
        self.spam_phrases = self.load_spam_phrases(spam_file)

    def load_spam_phrases(self, file):
        if file and os.path.exists(file):
            with open(file, 'r') as f:
                return [line.strip().lower() for line in f if line.strip()]
        return []

    def count_spam_occurrences(self, email_text):
        email_text_lower = email_text.lower()
        spam_points = sum(email_text_lower.count(spam_phrase) for spam_phrase in self.spam_phrases)
        print(spam_points)
        return spam_points

    def classify_email(self, email_text):
        spam_points = self.count_spam_occurrences(email_text)
        # Assuming a simple threshold for classification
        if spam_points > 0:
            return "Likely spam"
        else:
            return "Likely not spam"

if __name__ == '__main__':
    detector = SpamDetector('spam_words.txt')
    email = """Urgent! \nPlease verify your bank account by
    clicking the link: ACTION REQUIRED. Please verify your
    Bank of America account information to avoid a hold on
    your account. Click here to confirm: [Link]"""
    print(detector.classify_email(email))


In [None]:
import os
import re
import unicodedata

class SpamDetector:
    def __init__(self, spam_file):
        self.spam_phrases = self.load_spam_phrases(spam_file)
        self.stop_words = self.load_stop_words()

    def load_spam_phrases(self, file):
        if file and os.path.exists(file):
            with open(file, 'r') as f:
                return [line.strip().lower() for line in f if line.strip()]
        return []

    def load_stop_words(self):
        # Load or define a list of common stopwords
        return {'a', 'by', 'the', 'of', 'and', 'in', 'on', 'for', 'with', 'is', 'this', 'that', 'to', 'from', 'it'}

    def calculate_spam_ham_points(self, email_text):
        email_text_lower = email_text.lower()
        words = unicodedata.normalize('NFKD', email_text_lower).encode('ascii', 'ignore').decode('utf-8')
        print(words)
        clean_words = re.sub(r'[^a-z0-9\s]', '', words).split()
        print(clean_words)

        spam_points, ham_points = 0, 0

        for word in clean_words:  # Iterate over words
            if any(spam_phrase in email_text_lower for spam_phrase in self.spam_phrases):  # Check for phrases
                spam_points += 1
            elif word not in self.stop_words:
                ham_points += 1

        return spam_points, ham_points

    def classify_email(self, email_text):
        print(email_text)
        spam_points, ham_points = self.calculate_spam_ham_points(email_text)
        spam_ratio = spam_points / max(spam_points + ham_points, 1)
        print(f"Spam Points: {spam_points}, Ham Points: {ham_points}, Spam Ratio: {spam_ratio:.2f}")

        if spam_ratio <= 0.2:
            return "Likely not spam"
        elif spam_ratio <= 0.4:
            return "Possible spam"
        else:
            return "Likely spam"

if __name__ == '__main__':
    detector = SpamDetector('spam_words.txt')
    email = """Urgent! \nPlease verify your bank account by
    clicking the link: ACTION REQUIRED. Please verify your
    Bank of America account information to avoid a hold on
    your account. Click here to confirm: [Link]"""
    print(detector.classify_email(email))

In [2]:
import os
import re
import unicodedata

class SpamDetector:
    def __init__(self, spam_file):
        self.spam_phrases = self.load_spam_phrases(spam_file)

    def load_spam_phrases(self, file):
        if file and os.path.exists(file):
            with open(file, 'r') as f:
                return [line.strip().lower() for line in f if line.strip()]
        return []

    def clean_text(self, text):
        text = text.lower().strip().replace('\n', '')
        text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
        return re.sub(r'[^a-z0-9\s]', '', text)

    def calculate_spam_ham_points(self, email_text):
        message = self.clean_text(email_text)
        spam_points, ham_points = 0, 0

        for spam_phrase in self.spam_phrases:
            if spam_phrase in message:
                spam_points += message.count(spam_phrase)

        ham_words = [word for word in message.split() if word not in self.spam_phrases]
        ham_points = len(ham_words)

        return spam_points, ham_points

    def classify_email(self, email_text):
        spam_points, ham_points = self.calculate_spam_ham_points(email_text)
        total_points = spam_points + ham_points
        spam_ratio = spam_points / max(total_points, 1)

        print(f"Spam Points: {spam_points}, Ham Points: {ham_points}, Spam Ratio: {spam_ratio:.2f}")

        if spam_ratio <= 0.2:
            return "Likely not spam"
        elif spam_ratio <= 0.4:
            return "Possible spam"
        else:
            return "Likely spam"

if __name__ == '__main__':
    detector = SpamDetector('spam_words.txt')
    email = """Urgent! \nPlease verify your bank account by
    clicking the link: ACTION REQUIRED. Please verify your
    Bank of America account information to avoid a hold on
    your account. Click here to confirm: [Link]"""
    print(detector.classify_email(email))


Spam Points: 0, Ham Points: 32, Spam Ratio: 0.00
Likely not spam


In [5]:
import re

class SpamWordsReader:
    def __init__(self, file_path):
        self.file_path = file_path
        self.spam_words = self.read_spam_keywords()

    def read_spam_keywords(self):
        with open(self.file_path, 'r') as file:
            return [line.strip().lower() for line in file if line.strip()]

class Email:
    def __init__(self, content):
        self.content = content
        self.cleaned_content = self.preprocess_content()

    def preprocess_content(self):
        content = self.content.lower()
        content = re.sub(r'[^a-z0-9\'w\s]', '', content)
        return content

class SpamDetector:
    def __init__(self, spam_words):
        self.spam_words = spam_words

    def calculate_spam_score(self, email):
        words = email.split()
        spam_count = sum(word in self.spam_words for word in words)
        return spam_count

    def evaluate_spam_likelihood(self, spam_score, email_length):
        if email_length == 0:
            return 0
        ratio = spam_score / email_length
        if ratio > 0.4:
            return "Highly likely to be spam"
        elif ratio > 0.2:
            return "Possibly spam"
        else:
            return "Unlikely to be spam"

if __name__ == "__main__":
    # Assuming spam_words.txt is in the same directory as this script
    spam_reader = SpamWordsReader("spam_words.txt")
    
    while True:
        email_content = input("\nEnter the email content (or type 'exit' to quit):\n")
        if email_content.lower() == 'exit':
            break

        email = Email(email_content)
        detector = SpamDetector(spam_reader.spam_words)
        spam_score = detector.calculate_spam_score(email.cleaned_content)
        likelihood = detector.evaluate_spam_likelihood(spam_score, len(email.cleaned_content.split()))

        print(f"\nSpam Score: {spam_score}")
        print(f"Likelihood of Spam: {likelihood}")


Enter the email content (or type 'exit' to quit):
 Urgent! \nPlease verify your bank account by     clicking the link: ACTION REQUIRED. Please verify your     Bank of America account information to avoid a hold on     your account. Click here to confirm: [Link]



Spam Score: 0
Likelihood of Spam: Unlikely to be spam



Enter the email content (or type 'exit' to quit):
 exit
