In [None]:
# Install required libraries 
# Install gensim for downloading pre-trained models 
# !pip install gensim 
# Install Hugging Face Transformers for NLP pipelines 
# !pip install transformers 
# Install NLTK for text preprocessing and tokenization 
# !pip install nltk 
 
# Import libraries 
import gensim.downloader as api 
from transformers import pipeline 
import nltk 
import string 
from nltk.tokenize import word_tokenize 
# Download the 'punkt_tab' resource from NLTK 
nltk.download('punkt_tab') 
 
# Load pre-trained word vectors 
print("Loading pre-trained word vectors...") 
word_vectors = api.load("glove-wiki-gigaword-100")  # Load Word2Vec model 
 
# Function to replace words in the prompt with their most similar words 
def replace_keyword_in_prompt(prompt, keyword, word_vectors, topn=1): 
    """ 
                                                          
    Replace only the specified keyword in the prompt with its most similar word. 
 
    Args: 
        prompt (str): The original input prompt. 
        keyword (str): The word to be replaced with a similar word. 
        word_vectors (gensim.models.KeyedVectors): Pre-trained word embeddings. 
        topn (int): Number of top similar words to consider (default: 1). 
 
    Returns: 
        str: The enriched prompt with the keyword replaced. 
    """ 
    words = word_tokenize(prompt)  # Tokenize the prompt into words 
    enriched_words = [] 
 
    for word in words: 
        cleaned_word = word.lower().strip(string.punctuation)  # Normalize word 
         
        if cleaned_word == keyword.lower():  # Replace only if it matches the keyword 
            try: 
                # Retrieve similar word 
                similar_words = word_vectors.most_similar(cleaned_word, topn=topn) 
                if similar_words: 
                    replacement_word = similar_words[0][0]  # Choose the most similar word 
                    print(f"Replacing '{word}' → '{replacement_word}'") 
                    enriched_words.append(replacement_word) 
                    continue  # Skip appending the original word 
            except KeyError: 
                print(f"'{keyword}' not found in the vocabulary. Using original word.") 
 
                                                          
 
        enriched_words.append(word)  # Keep original if no replacement was made 
 
    enriched_prompt = " ".join(enriched_words) 
    print(f"\n🔹 Enriched Prompt: {enriched_prompt}") 
    return enriched_prompt 
 
# Load an open-source Generative AI model (GPT-2) 
print("\nLoading GPT-2 model...") 
generator = pipeline("text-generation", model="gpt2") 
 
# Function to generate responses using the Generative AI model 
def generate_response(prompt, max_length=100): 
    try: 
        response = generator(prompt, max_length=max_length, num_return_sequences=1) 
        return response[0]['generated_text'] 
    except Exception as e: 
        print(f"Error generating response: {e}") 
        return None 
 
# Example original prompt 
original_prompt = "Who is king." 
print(f"\n🔹 Original Prompt: {original_prompt}") 
 
# Retrieve similar words for key terms in the prompt 
key_term = "king" 
 
# Enrich the original prompt 
enriched_prompt = replace_keyword_in_prompt(original_prompt,key_term, 
word_vectors) 
                                                          
 
 


2025-04-25 15:06:08.890771: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745593569.104444   18446 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745593569.156196   18446 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745593569.574201   18446 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745593569.574321   18446 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745593569.574325   18446 computation_placer.cc:177] computation placer alr

Loading pre-trained word vectors...

Loading GPT-2 model...


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0



🔹 Original Prompt: Who is king.
Replacing 'king' → 'prince'

🔹 Enriched Prompt: Who is prince .


In [4]:
# Generate responses for the original and enriched prompts 
print("\nGenerating response for the original prompt...") 
original_response = generate_response(original_prompt) 
print("\nOriginal Prompt Response:") 
print(original_response) 
 
print("\nGenerating response for the enriched prompt...") 
enriched_response = generate_response(enriched_prompt) 
print("\nEnriched Prompt Response:") 
print(enriched_response) 
 


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Generating response for the original prompt...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Original Prompt Response:
Who is king. And, who is king's father."

2. And it came to pass in their case, when they were on the morrow, the sons of King David were taken up to him and he healed them; and he bore them many children. And the Lord said unto them, Behold, my servant Daniel, my son, and my servant Solomon the son of David, this day I will give you that shall be your king: and he said unto them, Come

Generating response for the enriched prompt...

Enriched Prompt Response:
Who is prince ...."... "He who can bear this name will live forever.... "

[The following line is taken aloud from the Gospel, "For the Lord knows thy Son from the beginning, that is, until this day," and the words in "For."]

This, then, is the word of the Lord, not of Adam, or of the Son, but of Christ, the Eternal, the Most Heaven, and the whole


In [3]:
# Compare the outputs 
print("\nComparison of Responses:") 
print("\nOriginal Prompt Response Length:", len(original_response)) 
print("Enriched Prompt Response Length:", len(enriched_response)) 
print("\nOriginal Prompt Response Detail:", original_response.count(".")) 
print("Enriched Prompt Response Detail:", enriched_response.count(".")) 


Comparison of Responses:

Original Prompt Response Length: 408
Enriched Prompt Response Length: 334

Original Prompt Response Detail: 2
Enriched Prompt Response Detail: 11
