In [None]:
!pip install sentence_transformers

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
import re
from sentence_transformers import SentenceTransformer
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import pickle
import numpy as np

In [5]:
# set up GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [6]:
all_songs = pd.read_csv('/content/drive/MyDrive/all_songs.csv')
all_songs['artist'] = all_songs['artist'].astype('category') # convert to categorical to get numerical classes
some_songs = all_songs.loc[all_songs['lyrics'].str.startswith('[').fillna(False)]
some_songs

Unnamed: 0,song title,lyrics,artist
0,In Da Club,"[Intro]\nGo, go, go, go, go, go\nGo Shorty, it...",50 Cent
1,21 Questions,[Ad-Libs]\nNew York City\nYou are now rockin'\...,50 Cent
2,Many Men (Wish Death),"[Skit]\nMan, we gotta go get somethin' to eat\...",50 Cent
3,My Life,"[Chorus]\nMy, yeah, yeah, mmm\nMy life, my lif...",50 Cent
4,Patiently Waiting,"[Intro]\nHey Em, you know you're my favorite w...",50 Cent
...,...,...,...
575,Cruisin’,"[Intro]\n(Crusin')\n\n[Verse 1]\nBaby, let's c...",Smokey Robinson
576,Really Gonna Miss You,[Verse 1]\nReally gonna miss you\nIt's really ...,Smokey Robinson
579,The Agony and the Ecstasy,[Verse 1]\nWhat's it all about this crazy love...,Smokey Robinson
582,Ooh Baby Baby,[Verse 1]\nI did you wrong\nMy heart went out ...,Smokey Robinson


In [7]:
# create training samples
X = some_songs.lyrics.values

# create labels
y = some_songs.artist.cat.codes.values

In [8]:
def split_by_verse(song, removeN = True):
    verses = song.split('[')
    for ind in range(1, len(verses)):
        verses[ind] = verses[ind].split(']')[1]
        verses[ind] = verses[ind][1:]
        if removeN:
            verses[ind] = re.sub('\n', ' ', verses[ind])
        verses[ind] = re.sub('  ', '', verses[ind])
    return verses[1:]

def split_by_bar(song, retNum = False):
    verses = split_by_verse(song, removeN=False)
    bars = []
    for verse in verses:
        t_bars = verse.split('\n')
        bars += t_bars[:-2]
    if retNum:
        return len(bars)
    return bars

def split_by_word(song, retNum = False):
    bars = split_by_bar(song)
    words = []
    for bar in bars:
        ws = bar.split(' ')
        words += ws
    if retNum:
        return len(words)
    return words

def get_embeddings(msgs):
    encoder = SentenceTransformer('distilbert-base-nli-mean-tokens')
    embeddings = encoder.encode(msgs)
    return torch.unsqueeze(torch.from_numpy(embeddings), 0)

In [9]:
X = X[:488:20]
y = y[:488:20]

fail_idxs = []

for ind in tqdm(range(len(X))):
    try:
        words = split_by_word(X[ind])
        X[ind] = get_embeddings(words)
    except IndexError:
        # remember to delete empty inputs
        fail_idxs.append(ind)
        continue

# delete all songs which failed to convert into embeddings
X = np.delete(X, fail_idxs)
y = np.delete(y, fail_idxs)

x_train, x_test, y_train, y_test = train_test_split(X, y, train_size = 0.9, shuffle = True)

# convert numpy ys into torch tensor
y_train = torch.from_numpy(y_train).type(torch.LongTensor)
y_test = torch.from_numpy(y_test).type(torch.LongTensor)

# Give dim for y labels
y_train = torch.unsqueeze(y_train, 1)
y_test = torch.unsqueeze(y_test, 1)

  0%|          | 0/25 [00:00<?, ?it/s]

HBox(children=(FloatProgress(value=0.0, max=244733649.0), HTML(value='')))




100%|██████████| 25/25 [01:28<00:00,  3.54s/it]


In [21]:
# with open('/content/drive/MyDrive/embeddings.pkl', 'wb') as f:
#     pickle.dump((X,y), f)

# with open('/content/drive/MyDrive/embeddings.pkl', 'rb') as f:
#     X, y = pickle.load(f)

torch.Size([1, 280, 768])

In [11]:
y_train = y_train.to(device)

In [16]:
class BiDir_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
      super(BiDir_LSTM, self).__init__()

      #define hidden size
      self.hidden_size = hidden_size

      #define rnn layer
      self.lstm = nn.LSTM(input_size, hidden_size, 1, batch_first=True, bidirectional=True)
      #define fully connected layer
      self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input):

      # # # init hidden state
      h0 = torch.zeros(2, 1, self.hidden_size).to(device)
      c0 = torch.zeros(2, 1, self.hidden_size).to(device)

      # pass input and h0 into rnn
      out, (h_out, c_out) = self.lstm(input, (h0, c0))

      out = out.reshape(-1, self.hidden_size)
      out = self.fc(out[-1])
      out = F.log_softmax(torch.unsqueeze(out, 0), dim=1)
      return out

In [17]:
num_artists = len(np.unique(some_songs.artist.cat.codes.values))

# define hyper params
rnn_model = BiDir_LSTM(input_size=768, hidden_size=128, output_size=num_artists).to(device)

num_epochs = 50
lr = 0.05

criterion = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(rnn_model.parameters(), lr=lr)

In [18]:
for epoch in range(num_epochs):
    for x_val, y_val in zip(x_train, y_train):
        rnn_model.train()
        optimiser.zero_grad()

        x_val = x_val.to(device)
        out = rnn_model(x_val)
        train_loss = criterion(out, y_val)

        train_loss.backward()
        optimiser.step()

In [24]:
def Accuracy(xs, ys):
    correct = 0
    for i in range(len(xs)):
        # run through model
        test_sample = xs[i].to(device)
        pred = rnn_model(test_sample)
        # calc argmax
        pred = torch.argmax(pred).item()
        # sum up correct predictions
        correct += (pred == ys[i].item())
    return correct/len(xs)

test_acc = Accuracy(x_test, y_test)
print('{}% Test Accuracy'.format(test_acc*100))

train_acc = Accuracy(x_train, y_train)
print('{}% Train Accuracy'.format(train_acc*100))

3
3
3
0.0% Test Accuracy
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
9.090909090909092% Train Accuracy
