In [None]:
import re

# Class for reading and storing spam keywords from a file
class SpamWordsReader:
    def __init__(self, file_path):
        """Initialize the SpamWordsReader with the path to the spam words file."""
        self.file_path = file_path
        # Load spam words from the specified file
        self.spam_words = self.read_spam_keywords()

    # Reads spam keywords from a file and returns them as a set
    def read_spam_keywords(self):
        """Read spam keywords from a file and return them as a set."""
        with open(self.file_path, 'r') as file:
            # Create a set of spam words for efficient look-up
            return set(line.strip().lower() for line in file if line.strip())

# Class representing an email with content processing capabilities
class Email:
    """Initialize the Email object with the email content."""
    def __init__(self, content):
        self.content = content
        # Preprocess the content for spam detection
        self.cleaned_content = self.preprocess_content()

    # Preprocesses the email content for spam detection (e.g., lowercasing, removing punctuation)
    def preprocess_content(self):
        """Preprocess the email content for spam detection (lowercasing, removing punctuation)."""
        content = self.content.lower()
        # Remove punctuation and special characters, replace with space
        content = re.sub(r'[^a-z0-9\'w\s]', ' ', content)
        return content

# Class for detecting spam in an email
class SpamDetector:
    def __init__(self, spam_words):
        """Initialize the SpamDetector with a set of spam words."""
        self.spam_words = spam_words

    # Calculates the spam score based on the occurrences of spam words
    def calculate_spam_score(self, email):
        """
        Calculate the spam score based on the occurrences of spam words.
        Returns the count of spam words and non-stop (ham) words in the email.
        """
        # Normalize the email to search for whole words/phrases
        normalized_email = ' ' + email + ' '
        spam_count = 0
        # Count the frequency of each spam word in the email
        for spam_word in self.spam_words:
            count = normalized_email.count(' ' + spam_word + ' ')
            spam_count += count

        # Define a comprehensive list of stop words
        stop_words = {'a', 'by', 'the', 'of', 'and', 'in', 'on', 'for', 'with', 
                    'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'any', 'are', "aren't", 
                    'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 
                    'but', 'can', "can't", 'cannot', 'could', "couldn't", 'did', "didn't", 'do', 'does', 
                    "doesn't", 'doing', "don't", 'down', 'during', 'each', 'few', 'from', 'further', 'had', 
                    "hadn't", 'has', "hasn't", 'have', "haven't", 'having', 'he', "he'd", "he'll", "he's", 
                    'her', 'here', "here's", 'hers', 'herself', 'him', 'himself', 'his', 'how', "how's", 
                    'i', "i'd", "i'll", "i'm", "i've", 'if', 'into', 'is', "isn't", 'it', "it's", 'its', 
                    'itself', 'let', "let's", 'me', 'more', 'most', "mustn't", 'my', 'myself', 'no', 'nor', 
                    'not', 'off', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours', 'ourselves', 'out', 
                    'over', 'own', 'same', "shan't", 'she', "she'd", "she'll", "she's", 'should', "shouldn't", 
                    'so', 'some', 'such', 'than', 'that', "that's", 'their', 'theirs', 'them', 'themselves', 
                    'then', 'there', "there's", 'these', 'they', "they'd", "they'll", "they're", "they've", 
                    'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', "wasn't", 
                    'we', "we'd", "we'll", "we're", "we've", 'were', "weren't", 'what', "what's", 'when', 
                    "when's", 'where', "where's", 'which', 'while', 'who', "who's", 'whom', 'why', "why's", 
                    'won', "won't", 'would', "wouldn't", 'you', "you'd", "you'll", "you're", "you've", 'your', 
                    'yours', 'yourself', 'yourselves', 't' , 's'} 

        # Split normalized email into words
        words = normalized_email.split()
        ham_count = 0
        # Count the words that are not stop words as ham
        for word in words:
            if word not in stop_words:
                ham_count += 1
        return spam_count, ham_count

    # Evaluates the likelihood of an email being spam based on spam and ham scores
    def evaluate_spam_likelihood(self, spam_score, ham_score):
        """
        Evaluate the likelihood of an email being spam based on spam and ham scores.
        Returns a string indicating the likelihood of the email being spam.
        """
        # Check if there are no non-stop words
        if ham_score == 0:
            return "Unable to determine (no ham words)"
        # Calculate the ratio of spam words to ham words
        ratio = spam_score / ham_score
        # Classify the email based on the calculated ratio
        if ratio > 0.4:
            return "Highly likely to be spam"
        elif ratio > 0.2:
            return "Possibly spam"
        else:
            return "Unlikely to be spam"

# Main execution loop for spam detection
if __name__ == "__main__":
    # Initialize the SpamWordsReader with the path to the spam words file.
    spam_reader = SpamWordsReader("spam_words.txt")
    
    # Start an infinite loop to continuously process email content.
    while True:
        # Prompt the user to enter email content or exit the program.
        email_content = input("\nEnter the email content (or type 'exit' to quit):\n")
        
        # Check if the user wants to exit the program.
        if email_content.lower() == 'exit':
            break  # Exit the loop, thus ending the program.

        # Create an Email object with the provided content.
        email = Email(email_content)

        # Initialize the SpamDetector with the spam words from the reader.
        detector = SpamDetector(spam_reader.spam_words)

        # Calculate the spam and ham scores for the provided email content.
        spam_score, ham_score = detector.calculate_spam_score(email.cleaned_content)

        # Evaluate the likelihood of the email being spam based on the scores.
        likelihood = detector.evaluate_spam_likelihood(spam_score, ham_score)

        # Print the results: spam score, ham score, and spam likelihood.
        print(f"\nSpam Score: {spam_score}")
        print(f"Ham Score: {ham_score}")
        print(f"Likelihood of Spam: {likelihood}")


Enter the email content (or type 'exit' to quit):
 Dear Friend,  Are you ready to earn extra cash and achieve financial freedom? This is a once in a lifetime opportunity to be your own boss and join the ranks of those who have already earned big bucks!  🌟 100% Free, No Obligation! 🌟  With our exclusive program, you can double your income, eliminate bad credit, and enjoy risk-free investments! Here's what we offer:  Special Promotion: Sign up today and get 100% more on your first investment. Free Membership: Access our resources and community at no cost. Cash Bonus: Earn a cash bonus just for joining. No Hidden Fees: What you see is what you get. 🚀 Act Now for a Limited Time!  This isn't just any offer - it's your chance to save big money and consolidate debt. Plus, with our satisfaction guaranteed policy, you have nothing to lose.  Click Here to unlock your potential and start earning today! Remember, this incredible deal won’t last forever.  Free Gift awaits the first 100 members! Do


Spam Score: 39
Ham Score: 97
Likelihood of Spam: Highly likely to be spam



Enter the email content (or type 'exit' to quit):
 I'm 100% satisfied with our team's performance this quarter, and as a token of appreciation, the company has decided to offer a special promotion. For those who have consistently exceeded their targets, there will be an extra cash bonus. Also, we are introducing a free trial of our new wellness program to support your health and well-being. Remember, this initiative is about acknowledging your hard work and dedication – it's our way of saying thank you!



Spam Score: 8
Ham Score: 38
Likelihood of Spam: Possibly spam



Enter the email content (or type 'exit' to quit):
 Dear Team,  I'm pleased to report our recent project is advancing well, thanks to everyone's hard work and collaborative spirit. As we move into the next phase, let's focus on refining our strategies to meet our goals efficiently. Your innovative ideas and feedback are always welcome and valued. Please remember to balance your workload to maintain productivity and personal well-being. Looking forward to our continued success.



Spam Score: 0
Ham Score: 41
Likelihood of Spam: Unlikely to be spam


## Spam email
Dear Friend,

Are you ready to earn extra cash and achieve financial freedom? This is a once in a lifetime opportunity to be your own boss and join the ranks of those who have already earned big bucks!

🌟 100% Free, No Obligation! 🌟

With our exclusive program, you can double your income, eliminate bad credit, and enjoy risk-free investments! Here's what we offer:

Special Promotion: Sign up today and get 100% more on your first investment.
Free Membership: Access our resources and community at no cost.
Cash Bonus: Earn a cash bonus just for joining.
No Hidden Fees: What you see is what you get.
🚀 Act Now for a Limited Time!

This isn't just any offer - it's your chance to save big money and consolidate debt. Plus, with our satisfaction guaranteed policy, you have nothing to lose.

Click Here to unlock your potential and start earning today! Remember, this incredible deal won’t last forever.

Free Gift awaits the first 100 members! Don’t miss out - apply now!

## Possibily Spam Email
I'm 100% satisfied with our team's performance this quarter, and as a token of    
appreciation, the company has decided to offer a special promotion. For those who    
have consistently exceeded their targets, there will be an extra cash bonus. Also, we    
are introducing a free trial of our new wellness program to support your health and    
well-being. Remember, this initiative is about acknowledging your hard work and     
dedication – it's our way of saying thank you!    

## Regural Email
Dear Team,

I'm pleased to report our recent project is advancing well, thanks to everyone's hard    
work and collaborative spirit. As we move into the next phase, let's focus on refining    
our strategies to meet our goals efficiently. Your innovative ideas and feedback are    
always welcome and valued. Please remember to balance your workload to maintain    
productivity and personal well-being. Looking forward to our continued success.     