In [None]:
# prompt: sentimental analysis model using word2vec gensim to calculate the cosine similarity between three  sentences

!pip install gensim

from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

sentences = [
    "This is a very good movie.",
    "The movie is excellent.",
    "The film is quite bad."
]


def preprocess_text(text):
    """Lowercases and tokenizes the input text."""
    text = text.lower()
    return text.split()


tokenized_sentences = [preprocess_text(sentence) for sentence in sentences]

# Train a Word2Vec model (or load a pre-trained one)
model = Word2Vec(sentences=tokenized_sentences, min_count=1, vector_size=100, window=5)

def sentence_embedding(sentence, model):
  """Creates an embedding for a sentence using word embeddings from a model."""
  tokens = preprocess_text(sentence)
  vectors = [model.wv[word] for word in tokens if word in model.wv]
  if vectors:
    return np.mean(vectors, axis=0).reshape(1,-1)  # Reshape for cosine_similarity
  else:
      return np.zeros((1, 100))  # Return zero vector if no words found in vocab


sentence1_embedding = sentence_embedding(sentences[0], model)
sentence2_embedding = sentence_embedding(sentences[1], model)
sentence3_embedding = sentence_embedding(sentences[2], model)


similarity1_2 = cosine_similarity(sentence1_embedding, sentence2_embedding)[0][0]
similarity1_3 = cosine_similarity(sentence1_embedding, sentence3_embedding)[0][0]
similarity2_3 = cosine_similarity(sentence2_embedding, sentence3_embedding)[0][0]

print(f"Cosine similarity between sentence 1 and 2: {similarity1_2}")
print(f"Cosine similarity between sentence 1 and 3: {similarity1_3}")
print(f"Cosine similarity between sentence 2 and 3: {similarity2_3}")