In [None]:
!pip install -U sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer
from numpy import dot
from numpy.linalg import norm
from google.colab import drive
import pandas as pd
import numpy as np
import ast
import pickle
from random import shuffle, choice
import tqdm

In [None]:
sentences = ['This is very similar', 'SNasodaskd', 'These are quite similar', 'Different sauce new gravy']

In [None]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
test_embeddings = model.encode(sentences)
print(test_embeddings)

In [None]:
def cosine_sim(embeddings):
  inp = embeddings[0]
  targets = embeddings[1:]
  cos_sim = []
  for target in targets:
    score = dot(inp,target)/(norm(inp)*norm(target))
    cos_sim.append(score)
  return cos_sim


In [None]:
def dot_prod(embeddings):
  inp = embeddings[0]
  targets = embeddings[1:]
  dot_prod = []
  for target in targets:
    dot_prod.append(dot(inp,target))
  return dot_prod

In [None]:
def make_prediction(songs):
  embeddings = model.encode(songs)
  sims = cosine_sim(embeddings)
  best_match = np.argmax(sims)
  if sims[best_match] < 0.6:
    return 3
  return best_match


In [None]:
print(cosine_sim(test_embeddings))

[0.12564191, 0.8093578, 0.14082688]


In [None]:
print(dot_prod(test_embeddings))

[0.12564191, 0.80935776, 0.14082688]


In [None]:
print(make_prediction(sentences))

1


In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data = pd.read_csv("drive/MyDrive/NLU/spotify_millsongdata.csv")
data.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [None]:
lyrics = data.text
lyrics = [str(song).replace("\r", "").replace("\n", "") for song in lyrics]

In [None]:
def generate_emotion_dict():
  emotion_dict = {}
  with open('drive/MyDrive/NLU/song_labels.txt') as file:
    for k, line in enumerate(file):
      emotions = ast.literal_eval(line.rstrip())
      for emotion in emotions:
        if emotion not in emotion_dict:
          emotion_dict[emotion] = [k]
        else:
          emotion_dict[emotion].append(k)
  return emotion_dict

def save_dict(filename, dictionary):
  with open(filename, 'wb') as f:
    pickle.dump(dictionary, f)

In [None]:
def generate_testing_samples(indices):
  samples = []
  all_keys = list(indices.keys())
  for key in indices:
    current_indices = indices[key]
    temp_keys = all_keys.copy()
    temp_keys.remove(key)
    for i in range(100):
      target_emotion = choice(temp_keys)
      x = choice(current_indices)
      y1 = choice(current_indices)
      y2 = choice(indices[target_emotion])
      targets = [y1, y2]
      pos = choice([0, 1])
      if pos == 1:
        targets = [y2, y1]
      targets.insert(0, x)
      samples.append([targets, pos, [key, target_emotion]])
  shuffle(samples)
  return samples
      


In [None]:
def run_performance_test(samples):
  total = 0
  correct = 0
  wrong = {'joy': [], 'love': [], 'sadness': [], 'anger': [], 'fear': [], 'surprise': []}
  for sample in tqdm.tqdm(samples):
    targets = sample[0]
    pos = sample[1]
    input_class = sample[2][0]
    target_class = sample[2][1]
    song_lyrics = []
    for target in targets:
      song_lyrics.append(lyrics[target])
    prediction = make_prediction(song_lyrics)
    # print(f"{prediction}:{pos}")
    if prediction == pos:
      correct += 1
      total += 1

    elif prediction == 3:
      continue
    else:
      total += 1
      wrong[input_class].append(target_class)
  accuracy = round(correct / total, 2)
  print(f"\nOverall Accuracy: {accuracy}")
  return accuracy, wrong

In [None]:
emotion_indices = generate_emotion_dict()

In [None]:
# save_dict('drive/MyDrive/NLU/emotion_dict.pkl', emotion_indices)

In [None]:
testing_samples = generate_testing_samples(emotion_indices)

In [None]:
accuracy, wrong = run_performance_test(testing_samples)

100%|██████████| 1200/1200 [00:15<00:00, 75.46it/s]


Overall Accuracy: 0.92





In [None]:
accuracies = []
for i in range(3):
  cur_samples = generate_testing_samples(emotion_indices)
  accuracy, wrong = run_performance_test(cur_samples)
  accuracies.append(accuracy)

100%|██████████| 600/600 [00:07<00:00, 77.88it/s]



Overall Accuracy: 0.57


100%|██████████| 600/600 [00:08<00:00, 71.32it/s]



Overall Accuracy: 0.6


100%|██████████| 600/600 [00:07<00:00, 79.91it/s]


Overall Accuracy: 0.78





In [None]:
print(accuracies)