### For single word

##### 1. Finding nearest 5 semantically similar words with similarity in vectors.

In [14]:
# Import necessary libraries
import gensim.downloader as api
from gensim.models import Word2Vec

# Load the pre-trained Word2Vec model
model = api.load('word2vec-google-news-300')

# Function to find top 5 related words
def find_related_words(word, topn=5):
    if word in model.key_to_index:
        return model.most_similar(word, topn=topn)
    else:
        return f"Word '{word}' not in vocabulary"

# Input word
input_word = 'simple'
related_words = find_related_words(input_word)

# Display results
for word, similarity in related_words:
    print(f"{word}: {similarity}")


straightforward: 0.7460168600082397
Simple: 0.7108175754547119
uncomplicated: 0.6297484636306763
simplest: 0.6171397566795349
easy: 0.5990299582481384


##### 2. Finding nearest 5 semantically similar words and then finding the semantic distance between two given words.

In [16]:
# Import necessary libraries
import gensim.downloader as api
from gensim.models import Word2Vec

# Load the pre-trained Word2Vec model
model = api.load('word2vec-google-news-300')

# Function to find top 5 related words
def find_related_words(word, topn=5):
    if word in model.key_to_index:
        return model.most_similar(word, topn=topn)
    else:
        return f"Word '{word}' not in vocabulary"

# Function to find semantic similarity distance
def similarity_distance(word1, word2):
    if word1 in model.key_to_index and word2 in model.key_to_index:
        similarity = model.similarity(word1, word2)
        distance = 1 - similarity
        return distance
    else:
        return f"One or both words '{word1}', '{word2}' not in vocabulary"

# Input word
input_word = 'simple'
related_words = find_related_words(input_word)

# Display results
print(f"Top 5 related words to '{input_word}':")
for word, similarity in related_words:
    print(f"{word}: {similarity}")

# # Calculate and display semantic similarity distance
# specific_word = 'easy'
# distance = similarity_distance(input_word, specific_word)
# print(f"\nSemantic similarity distance between '{input_word}' and '{specific_word}': {distance}")


Top 5 related words to 'simple':
straightforward: 0.7460168600082397
Simple: 0.7108175754547119
uncomplicated: 0.6297484636306763
simplest: 0.6171397566795349
easy: 0.5990299582481384

Semantic similarity distance between 'simple' and 'easy': 0.4009700417518616


##### 3. Finding the semantic distance between two given words.

In [None]:
# Calculate and display semantic similarity distance
specific_word = 'easy'
distance = similarity_distance(input_word, specific_word)
print(f"\nSemantic similarity distance between '{input_word}' and '{specific_word}': {distance}")

##### 4. Finding semantic similarity and semantic distance of nearest 5 words.

In [17]:
# Import necessary libraries
import gensim.downloader as api
from gensim.models import KeyedVectors

# Load the pre-trained Word2Vec model
model = api.load('word2vec-google-news-300')

# Function to find top 5 related words based on semantic distance
def find_related_words(word, topn=5):
    if word in model.key_to_index:
        # Retrieve similar words and their cosine similarities
        similar_words = model.most_similar(word, topn=topn)
        return similar_words
    else:
        return f"Word '{word}' not in vocabulary"

# Function to compute semantic distance (cosine similarity)
def semantic_distance(word1, word2):
    if word1 in model.key_to_index and word2 in model.key_to_index:
        similarity = model.similarity(word1, word2)
        return similarity
    else:
        return f"One or both words '{word1}' and '{word2}' not in vocabulary"

# Input word
input_word = 'simple'
related_words = find_related_words(input_word)

# Display results
for word, similarity in related_words:
    distance = semantic_distance(input_word, word)
    print(f"{word}: Cosine Similarity = {similarity}, Semantic Distance = {1 - distance}")



straightforward: Cosine Similarity = 0.7460168600082397, Semantic Distance = 0.25398313999176025
Simple: Cosine Similarity = 0.7108175754547119, Semantic Distance = 0.28918248414993286
uncomplicated: Cosine Similarity = 0.6297484636306763, Semantic Distance = 0.37025147676467896
simplest: Cosine Similarity = 0.6171397566795349, Semantic Distance = 0.3828602433204651
easy: Cosine Similarity = 0.5990299582481384, Semantic Distance = 0.4009700417518616


##### 5. Finding 5 nearest words to the given word (using word2vec).

In [11]:
import gensim.downloader as api

# Load the pretrained Word2Vec model
model = api.load("word2vec-google-news-300")

def find_similar_words(word, topn=5):
    try:
        similar_words = model.most_similar(word, topn=topn)
        return [w for w, score in similar_words]
    except KeyError:
        return f"The word '{word}' not found in the vocabulary."

# Example usage
input_word = "happy"
similar_words = find_similar_words(input_word)
print(f"Words similar to '{input_word}': {similar_words}")



Words similar to 'happy': ['glad', 'pleased', 'ecstatic', 'overjoyed', 'thrilled']


##### 6. Finding Nth nearest word with similarity and semantic difference to the given word.

In [19]:
# Import necessary libraries
import gensim.downloader as api
from gensim.models import KeyedVectors

# Load the pre-trained Word2Vec model
model = api.load('word2vec-google-news-300')

# Function to find the N-th related word
def find_nth_similar_word(word, rank):
    if word not in model.key_to_index:
        return f"Word '{word}' not in vocabulary"

    similar_words = model.most_similar(word, topn=rank + 1)  # Get top (rank + 1) similar words
    if rank < 1 or rank > len(similar_words):
        return f"Rank {rank} is out of range"

    nth_similar_word = similar_words[rank - 1]  # Get the N-th similar word
    return nth_similar_word

# Input word and rank
input_word = 'simple'
rank = 12  # Change this to the desired rank (e.g., 2nd similar, 18th similar, etc.)

# Find the N-th similar word
nth_similar_word = find_nth_similar_word(input_word, rank)

# Display result
if isinstance(nth_similar_word, str):
    print(nth_similar_word)
else:
    word, similarity = nth_similar_word
    print(f"{rank}th similar word to '{input_word}': {word} with Cosine Similarity = {similarity}, Semantic Distance = {1 - similarity}")


12th similar word to 'simple': intuitive with Cosine Similarity = 0.5120431780815125, Semantic Distance = 0.48795682191848755


##### 7. Finding 5 nearest words to the given word (using GloVe).

In [13]:
import gensim.downloader as api

# Load the pretrained GloVe model
model = api.load("glove-twitter-25")

def find_similar_words(word, topn=5):
    try:
        similar_words = model.most_similar(word, topn=topn)
        return [w for w, score in similar_words]
    except KeyError:
        return f"The word '{word}' not found in the vocabulary."

# Example usage
input_word = "cold"
similar_words = find_similar_words(input_word)
print(f"Words similar to '{input_word}': {similar_words}")


Words similar to 'cold': ['rough', 'snow', 'rain', 'dry', 'bit']


In [20]:
# Ensure you have the necessary libraries installed
!pip install gensim nltk

# Import the required libraries
import gensim.downloader as api
from gensim.models import KeyedVectors
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

# Load the pre-trained Word2Vec model
model = api.load('word2vec-google-news-300')

# Load stopwords from NLTK
stop_words = set(stopwords.words('english'))

# Function to find the most similar word
def find_most_similar_word(word):
    if word in model.key_to_index:
        # Retrieve the most similar word
        similar_word, _ = model.most_similar(word, topn=1)[0]
        return similar_word
    else:
        return word  # Return the original word if not in vocabulary

# Function to transform the sentence
def transform_sentence(sentence):
    words = sentence.split()
    transformed_words = []
    for word in words:
        if word.lower() not in stop_words:
            similar_word = find_most_similar_word(word)
            transformed_words.append(similar_word)
        else:
            transformed_words.append(word)
    return ' '.join(transformed_words)

# Input sentence
input_sentence = "president greets the press in chicago"
transformed_sentence = transform_sentence(input_sentence)

# Display result
print(f"Original sentence: {input_sentence}")
print(f"Transformed sentence: {transformed_sentence}")




[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Original sentence: president greets the press in chicago
Transformed sentence: President greet the media in baltimore


In [25]:
# Ensure you have the necessary libraries installed
!pip install gensim nltk

# Import the required libraries
import gensim.downloader as api
from gensim.models import KeyedVectors
import nltk
from nltk.corpus import stopwords
import re

# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')

# Load the pre-trained Word2Vec model
model = api.load('word2vec-google-news-300')

# Load stopwords from NLTK
stop_words = set(stopwords.words('english'))

# Function for text cleaning and preprocessing
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Remove non-alphanumeric characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Tokenize text
    words = nltk.word_tokenize(text)
    return words

# Function to find the most similar word
def find_most_similar_word(word):
    if word in model.key_to_index:
        # Retrieve the most similar word
        similar_word, _ = model.most_similar(word, topn=1)[0]
        return similar_word
    else:
        return word  # Return the original word if not in vocabulary

# Function to transform the sentence
def transform_sentence(sentence):
    words = preprocess_text(sentence)
    transformed_words = []
    for word in words:
        if word not in stop_words:
            similar_word = find_most_similar_word(word)
            transformed_words.append(similar_word)
        else:
            transformed_words.append(word)
    return ' '.join(transformed_words)

# Input sentence
input_sentence = "president greets the press in chicago"
transformed_sentence = transform_sentence(input_sentence)

# Display result
print(f"Original sentence: {input_sentence}")
print(f"Transformed sentence: {transformed_sentence}")




[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Original sentence: president greets the press in chicago
Transformed sentence: President greet the media in baltimore
