In [1]:
import math
class UnigramModel:
    def __init__(self,corpus_path: str):
        self.corpus_path = corpus_path
    
    def get_tokenized_sentences(self, file_path: str):
        with open(file= file_path, mode= 'r', encoding= 'utf-8') as f:
            return [sentance.strip().lower().split() for sentance in f]        
        
    def get_words_count(self,tokenized_sentences: list[list[str]]):
        words_count = {}
        for sentence in tokenized_sentences:
            for i in range(len(sentence)):
                w1 = sentence[i]
                if w1 not in words_count:
                    words_count[w1] = 0
                words_count[w1] += 1
        return words_count
    
    def compute_probs(self,words_count: dict):
        return {w1: count/ sum(words_count.values()) for w1, count in words_count.items()}
    
    def generate_next_word(self):
        return max(self.probs,key= self.probs.get)
    
    def build(self):
        tokenized_sentences = self.get_tokenized_sentences(file_path= self.corpus_path)
        words_count = self.get_words_count(tokenized_sentences= tokenized_sentences)
        self.probs = self.compute_probs(words_count= words_count)
        print("Probabilities of All Unigrams: : ",self.probs)
        print("Maxium Probability : ", max(self.probs,key= self.probs.get))

    def compute_perplexity(self,test_corpus_path: str):
        total_word_counts = 0
        total_log_prob = 0
        sentences = self.get_tokenized_sentences(file_path= test_corpus_path)
        for sentence in sentences:
            sentence_prob = 0
            for word in sentence:
                prob = self.probs.get(word, 1 / (sum(self.probs.values()) + len(self.probs)))
                sentence_prob += math.log(prob)
            total_word_counts += len(sentence) 
            total_log_prob += sentence_prob
        return math.exp(-total_log_prob/total_word_counts) # PP(w) = exp(-1/N * ∑log P(w_i|w_previous_words))
        
        
model = UnigramModel(corpus_path= 'ex_corpus.txt')
model.build()
print("Predicted Next Word: ", model.generate_next_word())
print("Perplexity: ", model.compute_perplexity(test_corpus_path= 'test_corpus.txt'))

Probabilities of All Unigrams: :  {'the': 0.08771929824561403, 'cat': 0.017543859649122806, 'sat': 0.017543859649122806, 'on': 0.017543859649122806, 'mat.': 0.017543859649122806, 'dogs': 0.017543859649122806, 'are': 0.03508771929824561, 'great': 0.017543859649122806, 'companions.': 0.017543859649122806, 'i': 0.03508771929824561, 'love': 0.017543859649122806, 'to': 0.05263157894736842, 'play': 0.017543859649122806, 'with': 0.017543859649122806, 'my': 0.017543859649122806, 'dog.': 0.017543859649122806, 'cats': 0.017543859649122806, 'independent': 0.017543859649122806, 'animals.': 0.017543859649122806, 'pets': 0.017543859649122806, 'bring': 0.017543859649122806, 'joy': 0.017543859649122806, 'families.': 0.017543859649122806, 'sun': 0.017543859649122806, 'is': 0.03508771929824561, 'shining': 0.017543859649122806, 'brightly': 0.017543859649122806, 'today.': 0.017543859649122806, 'enjoy': 0.017543859649122806, 'reading': 0.017543859649122806, 'books': 0.017543859649122806, 'in': 0.0175438596

In [2]:
class BigramModel:
    def __init__(self, corpus_path: str):
        self.corpus_path = corpus_path

    def tokenization(self, file_path: str):
        with open(file= file_path, mode= 'r', encoding= 'utf-8') as file:
            return [line.lower().strip().split() for line in file if line.strip()]    
    
    def get_words_count(self,sentences_tokens: list[list[str]]):
        from collections import Counter
        self.unique_word_count = Counter(word for sentence in sentences_tokens for word in sentence)
        bigram_count = {}
        for sentence_tokens in sentences_tokens:
            for i in range(len(sentence_tokens) - 1):
                w1,w2 = sentence_tokens[i], sentence_tokens[i+1]
                if w1 not in bigram_count:
                    bigram_count[w1] = {}
                if w2 not in bigram_count[w1]:
                    bigram_count[w1][w2] = 0
                bigram_count[w1][w2] += 1
        return bigram_count
    
    def compute_probs(self,bigram_counts: dict):
        return {w1 : {w2 : count/self.unique_word_count[w1] for w2, count in w2_count.items()} for w1, w2_count in bigram_counts.items()}

    def generate_next_words(self,input_query: str):
        import time
        input_query = input_query.lower().strip().split()
        last_word = input_query[-1]
        if last_word in self.bigram_probs:
            for _ in range(10):
                if last_word in self.bigram_probs:
                    next_word = max(self.bigram_probs[input_query[-1]], key= self.bigram_probs[input_query[-1]].get)
                    input_query.append(next_word)
                    last_word = next_word
            for word in input_query:
                print(word, end=' ', flush= True)
                time.sleep(0.5)
            print()
        else:
            print(f"Sorry, I couldn't find predictions for '{last_word}'. Try another word!") 

    def compute_perplexity(self,test_corpus_path: str):
        total_word_counts = 0
        total_log_prob = 0
        sentences = self.tokenization(file_path= test_corpus_path)
        for sentence in sentences:
            sentence_prob = 0
            for i in range(len(sentence)-1):
                prob = (self.bigram_probs.get(sentence[i], {}).get(sentence[i+1], 0) + 1) / (sum(self.bigram_probs.get(sentence[i], {}).values()) + len(self.unique_word_count))
                sentence_prob += math.log(prob)
            total_word_counts += len(sentence) 
            total_log_prob += sentence_prob
        return math.exp(-total_log_prob/total_word_counts) # PP(w) = exp(-1/N * ∑log P(w_i|w_previous_words))
    
    def build(self):
        sentences = self.tokenization(file_path= self.corpus_path)
        bigram_counts = self.get_words_count(sentences_tokens= sentences)
        self.bigram_probs = self.compute_probs(bigram_counts= bigram_counts)
        print("Bigram Probs: ", self.bigram_probs)

model = BigramModel(corpus_path= 'ex_corpus.txt')
model.build()

while True:
    query = input('You: ')
    if query.lower() != 'exit':
        model.generate_next_words(input_query= query)
    else:
        break

print("Perplexity: ", model.compute_perplexity(test_corpus_path= 'test_corpus.txt'))

Bigram Probs:  {'the': {'cat': 0.2, 'mat.': 0.2, 'sun': 0.2, 'park.': 0.2, 'weather': 0.2}, 'cat': {'sat': 1.0}, 'sat': {'on': 1.0}, 'on': {'the': 1.0}, 'dogs': {'are': 1.0}, 'are': {'great': 0.5, 'independent': 0.5}, 'great': {'companions.': 1.0}, 'i': {'love': 0.5, 'enjoy': 0.5}, 'love': {'to': 1.0}, 'to': {'play': 0.3333333333333333, 'families.': 0.3333333333333333, 'cook': 0.3333333333333333}, 'play': {'with': 1.0}, 'with': {'my': 1.0}, 'my': {'dog.': 1.0}, 'cats': {'are': 1.0}, 'independent': {'animals.': 1.0}, 'pets': {'bring': 1.0}, 'bring': {'joy': 1.0}, 'joy': {'to': 1.0}, 'sun': {'is': 1.0}, 'is': {'shining': 0.5, 'nice': 0.5}, 'shining': {'brightly': 1.0}, 'brightly': {'today.': 1.0}, 'enjoy': {'reading': 1.0}, 'reading': {'books': 1.0}, 'books': {'in': 1.0}, 'in': {'the': 1.0}, 'weather': {'is': 1.0}, 'nice': {'for': 1.0}, 'for': {'a': 1.0}, 'a': {'walk.': 1.0}, 'she': {'loves': 1.0}, 'loves': {'to': 1.0}, 'cook': {'delicious': 1.0}, 'delicious': {'meals.': 1.0}, 'he': {'pl

In [3]:
class TrigramModel:
    def __init__(self, corpus_path: str):
        self.corpus_path = corpus_path

    def tokenization(self):
        with open(file= self.corpus_path, mode= 'r', encoding= 'utf-8') as file:
            return [line.lower().strip().split() for line in file if line.strip()]    
        
    def get_words_count(self, sentences_tokens: list[list[str]]):
        trigram_count = {}
        for sentence_tokens in sentences_tokens:
            for i in range(len(sentence_tokens) - 2):
                w1, w2, w3 = sentence_tokens[i], sentence_tokens[i+1], sentence_tokens[i+2]
                if w1 not in trigram_count:
                    trigram_count[w1] = {}
                if w2 not in trigram_count[w1]:
                    trigram_count[w1][w2] = {}
                if w3 not in trigram_count[w1][w2]:
                    trigram_count[w1][w2][w3] = 0
                trigram_count[w1][w2][w3] += 1
        return trigram_count
    
    def compute_probs(self, trigram_counts: dict):
        return {w1: {w2: {w3: count / sum(trigram_counts[w1][w2].values())  for w3, count in trigram_counts[w1][w2].items()} for w2 in trigram_counts[w1]}for w1 in trigram_counts}

    def generate_next_word(self, input_query: str):
        input_query = input_query.lower().strip().split()
        if input_query.__len__() >= 2:
            last_two_words = input_query[-2:]
            if last_two_words[-2] in self.trigram_probs and  last_two_words[-1] in self.trigram_probs[last_two_words[-2]]:
                for _ in range(10):
                    if last_two_words[-2] in self.trigram_probs and last_two_words[-1] in self.trigram_probs[last_two_words[-2]]:
                        next_word = max(self.trigram_probs[last_two_words[-2]][last_two_words[-1]], key=self.trigram_probs[last_two_words[-2]][last_two_words[-1]].get)
                        input_query.append(next_word)
                        last_two_words = input_query[-2:]
                for word in input_query:
                    import time
                    print(word, end=' ', flush= True)
                    time.sleep(0.5)
                print()
            else:
                print(f"Sorry, I couldn't find predictions for '{last_two_words[0]}' and '{last_two_words[1]}'. Try another word!") 

        else:
            print("Need at least two words!")

    def build(self):
        tokens = self.tokenization()
        words_count = self.get_words_count(sentences_tokens= tokens)
        self.trigram_probs = self.compute_probs(trigram_counts= words_count)

model = TrigramModel(corpus_path= 'ex_corpus.txt')
model.build()

while True:
    query = input('You: ')
    if query.lower() != 'exit':
        model.generate_next_word(input_query= query)
    else:
        break



cat sat on the mat. 


In [None]:
import math
class UnigramModel:
    def __init__(self,corpus_path: str):
        self.corpus_path = corpus_path
    
    def get_tokenized_sentences(self, file_path: str):
        with open(file= file_path, mode= 'r', encoding= 'utf-8') as f:
            return [sentance.strip().lower().split() for sentance in f]        
        
    def get_words_count(self,tokenized_sentences: list[list[str]]):
        words_count = {}
        for sentence in tokenized_sentences:
            for i in range(len(sentence)):
                w1 = sentence[i]
                if w1 not in words_count:
                    words_count[w1] = 0
                words_count[w1] += 1
        return words_count
    
    def compute_probs(self,words_count: dict):
        return {w1: count/ sum(words_count.values()) for w1, count in words_count.items()}
    
    def generate_next_word(self):
        return max(self.probs,key= self.probs.get)
    
    def build(self):
        tokenized_sentences = self.get_tokenized_sentences(file_path= self.corpus_path)
        words_count = self.get_words_count(tokenized_sentences= tokenized_sentences)
        self.probs = self.compute_probs(words_count= words_count)
        print("Probabilities of All Unigrams: : ",self.probs)
        print("Maxium Probability : ", max(self.probs,key= self.probs.get))

    def compute_perplexity(self,test_corpus_path: str):
        sentence_prob = 0
        total_word_counts = 0
        total_log_prob = 0
        sentences = self.get_tokenized_sentences(file_path= test_corpus_path)
        for sentence in sentences:
            for word in sentence:
                prob = self.probs.get(word,1e-6)
                sentence_prob += math.log(prob)
            total_word_counts += len(sentence) 
            total_log_prob += sentence_prob
        return math.exp(-total_log_prob/total_word_counts) # PP(w) = exp(-1/N * ∑log P(w_i|w_previous_words))
        
        
model = UnigramModel(corpus_path= 'ex_corpus.txt')
model.build()
print("Predicted Next Word: ", model.generate_next_word())
print("Perplexity: ", model.compute_perplexity(test_corpus_path= 'test_corpus.txt'))


class BigramModel:
    def __init__(self, corpus_path: str):
        self.corpus_path = corpus_path

    def tokenization(self, file_path: str):
        with open(file= file_path, mode= 'r', encoding= 'utf-8') as file:
            return [line.lower().strip().split() for line in file if line.strip()]    
    
    def get_words_count(self,sentences_tokens: list[list[str]]):
        from collections import Counter
        unique_word_count = Counter(word for sentence in sentences_tokens for word in sentence)
        bigram_count = {}
        for sentence_tokens in sentences_tokens:
            for i in range(len(sentence_tokens) - 1):
                w1,w2 = sentence_tokens[i], sentence_tokens[i+1]
                if w1 not in bigram_count:
                    bigram_count[w1] = {}
                if w2 not in bigram_count[w1]:
                    bigram_count[w1][w2] = 0
                bigram_count[w1][w2] += 1
        return unique_word_count, bigram_count
    
    def compute_probs(self, unique_word_count: dict, bigram_counts: dict):
        return {w1 : {w2 : count/unique_word_count[w1] for w2, count in w2_count.items()} for w1, w2_count in bigram_counts.items()}

    def generate_next_words(self,input_query: str):
        import time
        input_query = input_query.lower().strip().split()
        last_word = input_query[-1]
        if last_word in self.bigram_probs:
            for _ in range(10):
                if last_word in self.bigram_probs:
                    next_word = max(self.bigram_probs[input_query[-1]], key= self.bigram_probs[input_query[-1]].get)
                    input_query.append(next_word)
                    last_word = next_word
            for word in input_query:
                print(word, end=' ', flush= True)
                time.sleep(0.5)
            print()
        else:
            print(f"Sorry, I couldn't find predictions for '{last_word}'. Try another word!") 

    def compute_perplexity(self,test_corpus_path: str):
        sentence_prob = 0
        total_word_counts = 0
        total_log_prob = 0
        sentences = self.tokenization(file_path= test_corpus_path)
        for sentence in sentences:
            for i in range(len(sentence)-1):
                prob = self.bigram_probs.get(sentence[i],{}).get(sentence[i+1],1e-6)
                sentence_prob += math.log(prob)
            total_word_counts += len(sentence) 
            total_log_prob += sentence_prob
        return math.exp(-total_log_prob/total_word_counts) # PP(w) = exp(-1/N * ∑log P(w_i|w_previous_words))
    
    def build(self):
        sentences = self.tokenization(file_path= self.corpus_path)
        unique_word_count, bigram_counts = self.get_words_count(sentences_tokens= sentences)
        self.bigram_probs = self.compute_probs(unique_word_count= unique_word_count, bigram_counts= bigram_counts)
        print("Bigram Probs: ", self.bigram_probs)

model = BigramModel(corpus_path= 'ex_corpus.txt')
model.build()

while True:
    query = input('You: ')
    if query.lower() != 'exit':
        model.generate_next_words(input_query= query)
    else:
        break

print("Perplexity: ", model.compute_perplexity(test_corpus_path= 'test_corpus.txt'))



class TrigramModel:
    def __init__(self, corpus_path: str):
        self.corpus_path = corpus_path

    def tokenization(self,file_path: str):
        with open(file= file_path, mode= 'r', encoding= 'utf-8') as file:
            return [line.lower().strip().split() for line in file if line.strip()]    
        
    def get_words_count(self, sentences_tokens: list[list[str]]):
        trigram_count = {}
        for sentence_tokens in sentences_tokens:
            for i in range(len(sentence_tokens) - 2):
                w1, w2, w3 = sentence_tokens[i], sentence_tokens[i+1], sentence_tokens[i+2]
                if w1 not in trigram_count:
                    trigram_count[w1] = {}
                if w2 not in trigram_count[w1]:
                    trigram_count[w1][w2] = {}
                if w3 not in trigram_count[w1][w2]:
                    trigram_count[w1][w2][w3] = 0
                trigram_count[w1][w2][w3] += 1
        return trigram_count
    
    def compute_probs(self, trigram_counts: dict):
        return {w1: {w2: {w3: count / sum(trigram_counts[w1][w2].values())  for w3, count in trigram_counts[w1][w2].items()} for w2 in trigram_counts[w1]}for w1 in trigram_counts}

    def generate_next_word(self, input_query: str):
        input_query = input_query.lower().strip().split()
        if input_query.__len__() >= 2:
            last_two_words = input_query[-2:]
            if last_two_words[-2] in self.trigram_probs and  last_two_words[-1] in self.trigram_probs[last_two_words[-2]]:
                for _ in range(10):
                    if last_two_words[-2] in self.trigram_probs and last_two_words[-1] in self.trigram_probs[last_two_words[-2]]:
                        next_word = max(self.trigram_probs[last_two_words[-2]][last_two_words[-1]], key=self.trigram_probs[last_two_words[-2]][last_two_words[-1]].get)
                        input_query.append(next_word)
                        last_two_words = input_query[-2:]
                for word in input_query:
                    import time
                    print(word, end=' ', flush= True)
                    time.sleep(0.5)
                print()
            else:
                print(f"Sorry, I couldn't find predictions for '{last_two_words[0]}' and '{last_two_words[1]}'. Try another word!") 

        else:
            print("Need at least two words!")

    def compute_perplexity(self,test_corpus_path: str):
        sentence_prob = 0
        total_word_counts = 0
        total_log_prob = 0
        sentences = self.tokenization(file_path= test_corpus_path)
        for sentence in sentences:
            for i in range(len(sentence)-2):
                prob = self.trigram_probs.get(sentence[i],{}).get(sentence[i+1],{}).get(sentence[i+2],1e-6)
                sentence_prob += math.log(prob)
            total_word_counts += len(sentence) 
            total_log_prob += sentence_prob
        return math.exp(-total_log_prob/total_word_counts) # PP(w) = exp(-1/N * ∑log P(w_i|w_previous_words))
    
    def build(self):
        tokens = self.tokenization(file_path= self.corpus_path)
        words_count = self.get_words_count(sentences_tokens= tokens)
        self.trigram_probs = self.compute_probs(trigram_counts= words_count)

model = TrigramModel(corpus_path= 'ex_corpus.txt')
model.build()

while True:
    query = input('You: ')
    if query.lower() != 'exit':
        model.generate_next_word(input_query= query)
    else:
        break

print("Perplexity: ", model.compute_perplexity(test_corpus_path= 'test_corpus.txt'))

In [None]:
import re
import nltk
from nltk.tokenize import word_tokenize

def preprocess_text(text:str):
    text = text.lower()
    text = re.sub(r'[^a-z\s]','',text)
    tokens = word_tokenize(text)
    print(tokens)

# text = "I love Natural Language Processing! It's amazing."
# tokens = preprocess_text(text)
# print(tokens)




defaultdict(<class 'int'>, {('<s>', 'i'): 2, ('i', 'love'): 2, ('love', 'nlp'): 1, ('nlp', '</s>'): 1, ('<s>', 'nlp'): 1, ('nlp', 'is'): 1, ('is', 'amazing'): 1, ('amazing', '</s>'): 1, ('love', 'python'): 1, ('python', '</s>'): 1})


In [None]:
bigram_model.get_ngram_prob(ngram=)

defaultdict(int,
            {('<s>',): 3,
             ('i',): 2,
             ('love',): 2,
             ('nlp',): 2,
             ('is',): 1,
             ('amazing',): 1,
             ('python',): 1})

In [9]:
bigram_model.ngram_counts

defaultdict(int,
            {('<s>', 'i'): 2,
             ('i', 'love'): 2,
             ('love', 'nlp'): 1,
             ('nlp', '</s>'): 1,
             ('<s>', 'nlp'): 1,
             ('nlp', 'is'): 1,
             ('is', 'amazing'): 1,
             ('amazing', '</s>'): 1,
             ('love', 'python'): 1,
             ('python', '</s>'): 1})

In [10]:
5-2+1

4

In [134]:
from collections import defaultdict
from nltk.tokenize import word_tokenize
import re, math, random, time

class NGramModel:
    def __init__(self,gram: int, corpus_path: str):
        self.n = gram
        self.corpus_path = corpus_path
        self.ngram_count = defaultdict(int)
        self.context_count = defaultdict(int)
        self.vocob = set()
    
    def tokenization(self, corpus_path: str):
        tokens = []
        with open(file= corpus_path, mode= 'r', encoding= 'utf-8') as f:
            for line in f:
                processed_text = re.sub(r'[^a-z\s]','',line.lower())
                tokens.append(word_tokenize(processed_text))
        return tokens
    
    def train(self):
        sentences_tokens = self.tokenization(corpus_path= self.corpus_path)
        for sentence_tokens in sentences_tokens:
            tokens = ['<s>'] * (self.n-1) + sentence_tokens + ['<\s>']
            for i in range(len(tokens) - self.n + 1):
                ngram = tuple(tokens[i:i+self.n])
                context = tuple(tokens[i:i+self.n-1])
                self.ngram_count[ngram] += 1
                self.context_count[context] += 1
            self.vocob.update(tokens)

    def get_prob(self,word):
        context = word[:-1]
        return (self.ngram_count[word] + 1) / (self.context_count[context] + len(self.vocob))

    def compute_perplexity(self,test_corpus_path: str):
        total_log_prob = 0
        total_word_count = 0
        sentences_token = self.tokenization(corpus_path= test_corpus_path)
        for sentence_token in sentences_token:
            token = ['<s>'] * (self.n + 1) + sentence_token + ['</s>']
            sentence_prob = 0
            for i in range(len(token) - self.n + 1):
                ngram = tuple(token[i:i+self.n])
                prob = self.get_prob(word=ngram)
                sentence_prob += math.log(prob)
            total_log_prob += sentence_prob
            total_word_count += len(token)
        return math.exp(-total_log_prob / total_word_count)
    
    def generate_text(self, input_words: str,max_length=10):
        sentence = ["<s>"] * (self.n - 1) + input_words.split()
        
        for _ in range(max_length):
            context = tuple(sentence[-(self.n-1):])  # Get the last (n-1) words
            possible_ngrams = {ngram: prob for ngram, prob in self.ngram_count.items() if ngram[:-1] == context}
            
            if not possible_ngrams:
                break  # No valid n-grams left
            
            next_word = random.choices(list(possible_ngrams.keys()), weights=list(possible_ngrams.values()))[0][-1]
            if next_word == "</s>":
                break
            
            sentence.append(next_word)
        return " ".join(sentence[self.n-1:]) 

while True:
    ngram_input = int(input("Enter the gram or 0 to exit: "))
    input_word = input("Enter the Input Word : ")
    if ngram_input != 0:
        ngram = NGramModel(gram= ngram_input, corpus_path= 'ex_corpus.txt')
        ngram.train()
        print(f"Model : {ngram_input} Gram")
        perplxity_score = ngram.compute_perplexity(test_corpus_path= 'test_corpus.txt')
        print("Perplxity Score : ", perplxity_score)
        genertaed_sentence = ngram.generate_text(input_words= input_word).replace('<\s>','')
        # print("Generated Sentence : ", genertaed_sentence)
        for word in genertaed_sentence.split():  # Split the sentence into words
            print(word, end=' ', flush=True)  # Print the word followed by a space
            time.sleep(0.5)
        print()
        print('------------------')
    else:
        break


Model : 3 Gram
Perplxity Score :  24.38337832186403
i saw one cat 
------------------
Model : 3 Gram
Perplxity Score :  24.38337832186403
i saw one cat sat on the mat 
------------------
Model : 3 Gram
Perplxity Score :  24.38337832186403
i know she loves to cook delicious meals 
------------------
Model : 6 Gram
Perplxity Score :  12.412640606314818
i know she loves 
------------------


ValueError: invalid literal for int() with base 10: 'exit'

In [None]:
while True:
    ngram_input = int(input("Enter the gram or 0 to exit: "))
    if ngram_input != 0:
        ngram = NGramModel(gram= ngram_input, corpus_path= 'ex_corpus.txt')
        ngram.train()
        print(f"Model : {ngram_input} Gram")
        perplxity_score = ngram.compute_perplexity(test_corpus_path= 'test_corpus.txt')
        print("Perplxity Score : ", perplxity_score)
        genertaed_sentence = ngram.generate_text(input_words= input_word).replace('<\s>','')
        print("Generated Sentence : ", genertaed_sentence)
        print('------------------')
    else:
        break

Model : 2 Gram
Perplxity Score :  32.91145145356951


TypeError: generate_text() missing 1 required positional argument: 'input_words'

In [147]:
with open('ex_corpus.txt','r') as file:
    for line in file:
        print(line.strip().rsplit(r'\t', 1))
        break

['Win a free lottery now!    spam']
