In [1]:
# [1]
import gensim.downloader as api
import random
import nltk
from nltk.tokenize import sent_tokenize

# [2]
# Ensure required resources are downloaded
nltk.download('punkt')

# [3]
# Load pre-trained word vectors
print("Loading pre-trained word vectors...")
word_vectors = api.load("glove-wiki-gigaword-100")  # 100D GloVe word embeddings
print("Word vectors loaded successfully!")

# [4]
def get_similar_words(seed_word, top_n=5):
    """Retrieve top-N similar words for a given seed word."""
    try:
        similar_words = word_vectors.most_similar(seed_word, topn=top_n)
        return [word for word, _ in similar_words]
    except KeyError:
        print(f"'{seed_word}' not found in vocabulary. Try another word.")
        return []

# [5]
def generate_sentence(seed_word, similar_words):
    """Create a meaningful sentence using the seed word and its similar words."""
    sentence_templates = [
        f"The {seed_word} is typically associated with {similar_words[0]} and {similar_words[1]}.",
        f"Most often, people link the term '{seed_word}' to {similar_words[2]}.",
        f"The term {seed_word} and {similar_words[3]} is a common pair.",
        f"In the zone of {seed_word}, {similar_words[4]} can be vital."
    ]
    # If fewer words retrieved, handle incomplete sentence
    if len(similar_words) < 5:
        print(f"Warning: Could not find 5 similar words. Generated with {len(similar_words)}.")
    return random.choice(sentence_templates)

# [6]
def generate_paragraph(seed_word):
    """Construct a short paragraph using the seed word and its similar words."""
    similar_words = get_similar_words(seed_word, top_n=5)
    if not similar_words:
        return f"Could not generate a paragraph. Try another seed word."
    
    return ' '.join(generate_sentence(seed_word, similar_words) for _ in range(5))

# [7]
# Example usage
seed_word = 'river'  # seed word
print("Generated Paragraph:\n")
print(generate_paragraph(seed_word))


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Shrey\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Loading pre-trained word vectors...
Word vectors loaded successfully!
Generated Paragraph:

In the zone of river, danube can be vital. The term river and valley is a common pair. The term river and valley is a common pair. The river is typically associated with rivers and creek. In the zone of river, danube can be vital.
