# Recommending Other Songs by Lyrics

In [1]:
import pandas as pd
import numpy as np
import json
import ast
from sklearn.neighbors import NearestNeighbors

## Use Pretrained GloVe Model to Vectorize Lyrics
Note: Due to the filesize, the file *glove.6B.50d.txt* does not exist in this repo. To obtain the latest version of word vectorizations, go to the [Official Page for Stanford's Global Vectors for Word Representation](https://nlp.stanford.edu/projects/glove/).

In [2]:
def loadGloveModel():
    print("Loading Glove Model")
    f = open('recommendation_resources/glove.6B.50d.txt','r')
    gloveModel = {}
    for line in f:
        splitLines = line.split()
        word = splitLines[0]
        wordEmbedding = np.array([float(value) for value in splitLines[1:]])
        gloveModel[word] = wordEmbedding
    print(len(gloveModel)," words loaded!")
    return gloveModel

gloveModel=loadGloveModel()

Loading Glove Model
400000  words loaded!


In [3]:
# Import data and evaluate strings as lists for clean_lyrics
rec_df = pd.read_csv('recommendation_resources/preprocessed_lyrics.csv', index_col=0)
rec_df.clean_lyrics = rec_df.clean_lyrics.apply(lambda x: ast.literal_eval(x))
rec_df.head()

Unnamed: 0,artist,song_name,file_name,lyrics,clean_lyrics
0,Gary Wright,Love Is Alive,love_is_alive.txt,"Well, I think it's time to get ready To realiz...","[well, think, time, get, ready, realize, find,..."
1,LMFAO,Party Rock Anthem,party_rock_anthem.txt,Party rock Let's go! Party rock is in the hou...,"[party, rock, let, us, go, party, rock, house,..."
2,Metallica,2 X 4,2_x_4.txt,"I’m gonna make you, shake you, take you I’m go...","[go, make, shake, take, go, one, break, put, s..."
3,Metallica,Prince Charming,prince_charming.txt,There’s a black cloud overhead That’s me And t...,"[black, cloud, overhead, poison, ivy, choke, t..."
4,Metallica,Now That We'Re Dead,now_that_we're_dead.txt,When darkness falls May it be That we should s...,"[darkness, fall, may, see, light, reaper, call..."


In [4]:
# Create column for GloVe vectors. Use zero arrays for words not in GloVe.
def get_vector(word):
    try:
        return gloveModel[word]
    except:
        return np.zeros(len(gloveModel['word']))

# For simple KNN, take average of vectorized words in cleaned lyrics
rec_df['vectors'] = rec_df.clean_lyrics.apply(lambda x: np.mean([get_vector(word) for word in x], axis=0))
rec_df.head()

Unnamed: 0,artist,song_name,file_name,lyrics,clean_lyrics,vectors
0,Gary Wright,Love Is Alive,love_is_alive.txt,"Well, I think it's time to get ready To realiz...","[well, think, time, get, ready, realize, find,...","[0.19744645945945963, 0.37155174594594587, 0.2..."
1,LMFAO,Party Rock Anthem,party_rock_anthem.txt,Party rock Let's go! Party rock is in the hou...,"[party, rock, let, us, go, party, rock, house,...","[-0.07758194069264084, 0.05038790173160177, 0...."
2,Metallica,2 X 4,2_x_4.txt,"I’m gonna make you, shake you, take you I’m go...","[go, make, shake, take, go, one, break, put, s...","[0.23941783802816885, 0.06187164084507043, 0.0..."
3,Metallica,Prince Charming,prince_charming.txt,There’s a black cloud overhead That’s me And t...,"[black, cloud, overhead, poison, ivy, choke, t...","[0.09181784571428582, 0.24025833714285713, 0.0..."
4,Metallica,Now That We'Re Dead,now_that_we're_dead.txt,When darkness falls May it be That we should s...,"[darkness, fall, may, see, light, reaper, call...","[0.26997853366336627, 0.3395790495049503, 0.02..."


# Generate Recommendations with K-Nearest Neighbors

In [5]:
# Default settings for KNN will suffice for this model
knn = NearestNeighbors()
knn.fit(list(rec_df.vectors))

# Get top 6 nearest neighbors
recos = knn.kneighbors(list(rec_df.vectors), 6, return_distance=False)
recos = [rec[1:] for rec in recos] # Drop first neighbor because self
recos[:5]

[array([1555, 1721, 1972,  691,   30]),
 array([1491,  438,  737, 1223, 1638]),
 array([1465,  446,  391,  227, 1229]),
 array([1554,  889,  248,  870, 1217]),
 array([2077,  824, 1969, 1484, 1838])]

In [6]:
# Get details of recommendations based on reco id's
all_recs = []
for i in rec_df.index:
    
    rec_ids = recos[i]
    
    recs = {}
    for j in rec_ids:
        recs[rec_df.song_name[j]]={rec_df.artist[j]:rec_df.file_name[j]}
    
    all_recs.append(recs)

rec_df['recommendations'] = all_recs
rec_df.head()

Unnamed: 0,artist,song_name,file_name,lyrics,clean_lyrics,vectors,recommendations
0,Gary Wright,Love Is Alive,love_is_alive.txt,"Well, I think it's time to get ready To realiz...","[well, think, time, get, ready, realize, find,...","[0.19744645945945963, 0.37155174594594587, 0.2...",{'Cause I Love You': {'Smokey Robinson & The M...
1,LMFAO,Party Rock Anthem,party_rock_anthem.txt,Party rock Let's go! Party rock is in the hou...,"[party, rock, let, us, go, party, rock, house,...","[-0.07758194069264084, 0.05038790173160177, 0....",{'A Life In The Day Of Benjamin André (Incompl...
2,Metallica,2 X 4,2_x_4.txt,"I’m gonna make you, shake you, take you I’m go...","[go, make, shake, take, go, one, break, put, s...","[0.23941783802816885, 0.06187164084507043, 0.0...",{'Hold On': {'Wilson Phillips': 'hold_on.txt'}...
3,Metallica,Prince Charming,prince_charming.txt,There’s a black cloud overhead That’s me And t...,"[black, cloud, overhead, poison, ivy, choke, t...","[0.09181784571428582, 0.24025833714285713, 0.0...",{'The Tracks Of My Tears': {'Smokey Robinson &...
4,Metallica,Now That We'Re Dead,now_that_we're_dead.txt,When darkness falls May it be That we should s...,"[darkness, fall, may, see, light, reaper, call...","[0.26997853366336627, 0.3395790495049503, 0.02...",{'Starblind': {'Iron Maiden': 'starblind.txt'}...


### Create JSON File with Recommendations

In [7]:
recs = {}
for artist in rec_df.artist.unique():
    recs[artist] = {}

for i in rec_df.index:
#     song_info = {
#         'file_name':rec_df.file_name[i],
#         'recommendations':rec_df.recommendations[i],
#     }
    recs[rec_df.artist[i]][rec_df.song_name[i]]=rec_df.recommendations[i]

with open("recommendations.json", "w") as outfile:  
    json.dump(recs, outfile)

recs

{'Gary Wright': {'Love Is Alive': {'Cause I Love You': {'Smokey Robinson & The Miracles': 'cause_i_love_you.txt'},
   "Jumpin' In The Morning": {'Ray Charles': "jumpin'_in_the_morning.txt"},
   'Shake You Down': {'Gregory Abbott': 'shake_you_down.txt'},
   "Love'S A Deadly Weapon": {'KISS': "love's_a_deadly_weapon.txt"},
   "Stayin' Alive": {'Bee Gees': "stayin'_alive.txt"}}},
 'LMFAO': {'Party Rock Anthem': {'A Life In The Day Of Benjamin André (Incomplete)': {'Outkast': 'a_life_in_the_day_of_benjamin_andré_(incomplete).txt'},
   'The Beast': {'Blondie': 'the_beast.txt'},
   'What A Day That Was': {'Talking Heads': 'what_a_day_that_was.txt'},
   'Stand Up (Kick Love Into Motion)': {'Def Leppard': 'stand_up_(kick_love_into_motion).txt'},
   'One Dance': {'Drake': 'one_dance.txt'}}},
 'Metallica': {'2 X 4': {'Hold On': {'Wilson Phillips': 'hold_on.txt'},
   'Everything You Want': {'Vertical Horizon': 'everything_you_want.txt'},
   'Heartbreak Hotel': {'Whitney Houston': 'heartbreak_hote