In [179]:
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
import itertools
import random 

In [119]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\itsme\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\itsme\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\itsme\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [103]:
def get_synonyms(word):
    synonyms = []
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.append(lemma.name())
    return set(synonyms)

In [104]:
def find_relevant_words(text):
    tokens = word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)
    relevant_words = []
    for word, pos in pos_tags:
        if(pos.startswith(('VB', 'JJ', 'RB'))):
            relevant_words.append(word)
    return relevant_words

In [172]:
def word_similarity(word1, word2):
    synsets1 = wordnet.synsets(word1)
    synsets2 = wordnet.synsets(word2)
    max_similarity = 0
    
    for synset1 in synsets1:
        for synset2 in synsets2:
            similarity = synset1.path_similarity(synset2)
            if similarity and similarity > max_similarity:
                max_similarity = similarity
                
    return max_similarity

In [171]:
def replace_with_synonyms(text):
    relevant_words = find_relevant_words(text)
    dict = {}
    
    tokens = word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)
    
    for word, pos in pos_tags:
        if word in relevant_words:
            synonyms = get_synonyms(word)
            text = []
            count = 0
            for synonym in synonyms:
                if(synonym!=word):
                    similarity = word_similarity(word, synonym)
                    if similarity and similarity >= 0.99 and count<=4:
                        count = count+1
                        text.append(synonym)
            dict[word] = text       
    return dict


In [189]:
def generate_sentences(text, synonym_dict, max_sentences=7):
    tokens = word_tokenize(text)
    pos_tags = nltk.pos_tag(tokens)
    generate_sentences_set = set()
    word_options = []
    for word in tokens:
        if word in synonym_dict:
            word_options.append([word] + synonym_dict[word])
        else:
            word_options.append([word])
    
    all_combinations = list(itertools.product(*word_options))
    
    for combination in all_combinations:
        sentence = ' '.join(combination)
        if(sentence!=text):
            generate_sentences_set.add(sentence)
        if len(generate_sentences_set) >= max_sentences:
            break
    
    return generate_sentences_set

In [190]:
text = "Customer 365 days expiry date starts from the date of sim activation. Sales are expected to drop by half by the end of the year. The future boom that was predicted, now seems uncertain. The expected results will not be met."
sentences = [s.strip() for s in text.split('.') if s.strip()]

selected_sentences = random.sample(sentences, min(5, len(sentences)))
    
for selected_sentence in selected_sentences:
    print(f"Original sentence: {selected_sentence}")
    synonym_dict = replace_with_synonyms(selected_sentence)
    unique_sentences = generate_sentences(selected_sentence, synonym_dict, max_sentences=3)
    for i, sentence in enumerate(unique_sentences, 1):
        print(f"Generated sentence {i}: {sentence}")
    print("\n")

Original sentence: The future boom that was predicted, now seems uncertain
Generated sentence 1: The future boom that was predicted , now seems incertain
Generated sentence 2: The future boom that was predicted , now seems changeable
Generated sentence 3: The future boom that was predicted , now seems uncertain


Original sentence: The expected results will not be met
Generated sentence 1: The expected results will not be fill
Generated sentence 2: The expected results will not be take_on
Generated sentence 3: The expected results will not be fulfil


Original sentence: Sales are expected to drop by half by the end of the year
Generated sentence 1: Sales are expected to sink by half by the end of the year
Generated sentence 2: Sales are expected to drip by half by the end of the year
Generated sentence 3: Sales are expected to drop_off by half by the end of the year


Original sentence: Customer 365 days expiry date starts from the date of sim activation
Generated sentence 1: Customer 