<a href="https://colab.research.google.com/github/markuryy/Semantic-Similarity-Colab/blob/main/Semantic_Similarity_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install plotly transformers torch pandas

In [None]:
import torch
from transformers import CLIPTokenizer, CLIPModel
import plotly.express as px
from sklearn.manifold import TSNE
import pandas as pd

# Load the CLIP model and tokenizer
model_name = "openai/clip-vit-base-patch32"
model = CLIPModel.from_pretrained(model_name)
tokenizer = CLIPTokenizer.from_pretrained(model_name)

# Define the target word and related words
target_word = 'virile'
related_words = ['handsome', 'dapper', 'suave', 'debonair', 'masculine', 'gallant', 'vigorous', 'robust']

words = [target_word] + related_words
inputs = tokenizer(words, return_tensors="pt", padding=True)

# Get the embeddings for the words
with torch.no_grad():
    embeddings = model.get_text_features(**inputs)

embeddings = embeddings.numpy()

# Reduce dimensions to 3D with adjusted perplexity
tsne = TSNE(n_components=3, random_state=0, perplexity=5)  # Set perplexity to a value less than number of samples
embeddings_3d = tsne.fit_transform(embeddings)

# Prepare data for Plotly
df = pd.DataFrame(embeddings_3d, columns=['x', 'y', 'z'])
df['word'] = words

# Plot the words
fig = px.scatter_3d(df, x='x', y='y', z='z', text='word')

fig.update_traces(marker=dict(size=5), selector=dict(mode='markers+text'))
fig.update_layout(title='3D Semantic Similarity Visualization', scene=dict(aspectmode='cube'))

fig.show()
