# Setup

Run first

In [None]:
!pip install vaderSentiment

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from scipy import spatial
import numpy as np
import pandas as pd

# Sentiment Analysis

In [None]:
# Start VADER

analyzer = SentimentIntensityAnalyzer()
word = 'search find'
vs = analyzer.polarity_scores(word)
print("{:-<65} {}".format(word, str(vs)))

In [None]:
# Converts text to a SA vector through VADER

def text_2_sent_vec(text):
  vs = analyzer.polarity_scores(text)
  sent_vec = np.zeros((len(vs)))
  for idx, key in enumerate(vs):
    sent_vec[idx] = vs[key]

  return sent_vec

In [None]:
text_2_sent_vec("funeral somber")

In [None]:
# Testing analogy pairs with VADER

analyzer = SentimentIntensityAnalyzer()

questions = [["diamond:baseball", "court:poker", "court:jury", "court:grass", "court:squash", 4],
             ["bench:judge", "throne:king", "queen:king", "court:king", "knight:king", 3],
             ["funeral:somber", "tension:festive", "soiree:festive", "eulogy:festive", "sari:festive", 2],
             ["defeat:vanquish", "search:peer", "search:ransack", "search:destroy", "search:find", 4],
             ["slug:land", "shark:seaweed", "shark:ocean", "shark:sky", "shark:slide", 2]]

analogy_predictions = []

for p in range(0, len(questions)):
  print("Q"+ str(p+1) +":")
  q = questions[p]
  
  a = q[0].split(':')
  a_combined = a[0] + ' ' + a[1]
  a_vec = text_2_sent_vec(a_combined)
  print(a_vec)

  sim_scores = []
  for i in range(1,5):
    b = q[i].split(':')
    b_combined = b[0] + ' ' + b[1]
    b_vec = text_2_sent_vec(b_combined)
    print(b_vec)

    sim = 1 - spatial.distance.cosine(a_vec, b_vec)
    sim_scores.append(sim)

  print("Similarities:")
  print(sim_scores)
  analogy_predictions.append(np.argmax(sim_scores)+1)
  print()
  



print("-----")

correct_anal = 0
for i in range (0,len(questions)):
  if questions[i][5] == analogy_predictions[i]:
    print("Correct answer for Question #" + str(i+1))
  else:
    print("Incorrect answer for Question #" + str(i+1))

In [None]:
# Testing with synonyms/antonyms

# Need CSV from Project 3
syntest = pd.read_csv("syntest.csv")
analyzer = SentimentIntensityAnalyzer()

prediction_list = []

for k in range(0,len(syntest)):
  t = syntest.loc[k]
  q = text_2_sent_vec(t.loc["Question"])
  sim_scores = []
  for i in range(1,5):
    sim = 1 - spatial.distance.cosine(q, text_2_sent_vec(t.loc["Answer"+str(i)]))
    sim_scores.append(sim)

  # Use print to see all computed similarity scores
  #print(k+1, sim_scores)
  if t.loc["Type"] == 'synonym':
    prediction = t.loc["Answer"+str(np.argmax(sim_scores) + 1)]
  else:
    prediction = t.loc["Answer"+str(np.argmin(sim_scores) + 1)]

  if prediction == t.loc["Correct"]:
    prediction_list.append(1)
  else:
    prediction_list.append(0)

print("Correct answers: " + str(np.count_nonzero(prediction_list)) +"/"+ str(len(prediction_list)) )