In [None]:
#execute in collab only

In [None]:
# Install gensim for downloading pre-trained models
!pip install numpy
!pip install gensim
!pip install "numba>=0.53,<1.23"  # Adjust version if needed
!pip install "tensorflow>=2.1,<2.19" # Adjust version if needed
# Install Hugging Face Transformers for NLP pipelines
!pip install transformers

# Install NLTK for text preprocessing and tokenization
!pip install nltk

# [2] - Import libraries
import torch
import gensim.downloader
api = gensim.downloader
from transformers import pipeline
import nltk
import string
from nltk.tokenize import word_tokenize

# Download the 'punkt' resource from NLTK
nltk.download('punkt')
nltk.download('punkt_tab')

print("Loading pre-trained word vectors...")
word_vectors = api.load("glove-wiki-gigaword-100")  # Load Word2Vec model

# [6] - Function to replace words in the prompt with their most similar words
def replace_keyword_in_prompt(prompt, keyword, word_vectors, topn=1):
    """
    Replace only the specified keyword in the prompt with its most similar word.

    Args:
        prompt (str): The original input prompt.
        keyword (str): The word to be replaced with a similar word.
        word_vectors (gensim.models.KeyedVectors): Pre-trained word embeddings.
        topn (int): Number of top similar words to consider (default: 1).

    Returns:
        str: The enriched prompt with the keyword replaced.
    """
    words = word_tokenize(prompt)  # Tokenize the prompt into words
    enriched_words = []

    for word in words:
        cleaned_word = word.lower().strip(string.punctuation)  # Normalize word
        if cleaned_word == keyword.lower():  # Replace only if it matches the keyword
            try:
                # Retrieve similar word
                similar_words = word_vectors.most_similar(cleaned_word, topn=topn)
                if similar_words:
                    replacement_word = similar_words[0][0]  # Choose the most similar word
                    print(f"Replacing '{word}' → '{replacement_word}'")
                    enriched_words.append(replacement_word)
                else:
                    enriched_words.append(word)
            except KeyError:
                print(f"'{keyword}' not found in the vocabulary. Using original word.")
                enriched_words.append(word)
        else:
            enriched_words.append(word)  # Keep original if no replacement was made

    enriched_prompt = ' '.join(enriched_words)
    print(f"\nEnriched Prompt: {enriched_prompt}")
    return enriched_prompt

# [7] - Install torch and related libraries
!pip install torch torchvision torchaudio

# [8] - Load an open-source Generative AI model (GPT-2)
print("\nLoading GPT-2 model...")
generator = pipeline("text-generation", model="gpt2", framework="pt")

# [9] - Function to generate responses using the Generative AI model
def generate_response(prompt, max_length=100):
    try:
        response = generator(prompt, max_length=max_length, num_return_sequences=1)
        return response[0]['generated_text']
    except Exception as e:
        print(f"Error generating response: {e}")
        return None

# [10] - Example original prompt
original_prompt = "Who is king."
print("\n Original Prompt: {original_prompt}")

# [11] - Retrieve similar words for key terms in the prompt
key_term = "king"

enriched_prompt = replace_keyword_in_prompt(original_prompt, key_term, word_vectors)

# [13] - Generate responses for the original and enriched prompts
print("\nGenerating response for the original prompt...")
original_response = generate_response(original_prompt)
print("Original Prompt Response:")
print(original_response)

print("\nGenerating response for the enriched prompt...")
enriched_response = generate_response(enriched_prompt)
print("Enriched Prompt Response:")
print(enriched_response)

# [14] - Compare the outputs of responses
print("\nComparison of Responses:")
print("Original Prompt Response Length:", len(original_response))
print("Enriched Prompt Response Length:", len(enriched_response))
print("Original Prompt Response Details:", original_response.count('.'))
print("Enriched Prompt Response Details:", enriched_response.count('.'))