In [12]:
from spacy_preprocessor import SpacyPreprocessor
import sqlalchemy as db
import pandas as pd
import tqdm

# Connect to DB and grab tables

In [13]:
engine = db.create_engine('postgresql://localhost:5432/lyricsdb')
connection = engine.connect()
metadata = db.MetaData()
artists = db.Table('artist_info', metadata, autoload=True, autoload_with=engine)
songs = db.Table('songs', metadata, autoload=True, autoload_with=engine)
songs_info = db.Table('songs', metadata, autoload=True, autoload_with=engine)

In [14]:
query = 'SELECT * FROM songs;'
song_df_db = pd.read_sql(query, connection)

In [15]:
query = 'SELECT * FROM artist_info;'
artist_df_db = pd.read_sql(query, connection)

In [16]:
artist_df_db.head()

Unnamed: 0,artist_id,artist_name,genre,artist_picture_ranker_url,artist_picture_genius_url,ranker_ranking
0,0,Tupac,Rap/Hip-Hop,https://imgix.ranker.com/user_node_img/115/228...,,0
1,1,The Notorious B.I.G.,Rap/Hip-Hop,https://imgix.ranker.com/user_node_img/111/221...,,1
2,2,Eminem,Rap/Hip-Hop,https://imgix.ranker.com/user_node_img/47/9378...,,2
3,3,Kendrick Lamar,Rap/Hip-Hop,https://imgix.ranker.com/user_node_img/3107/62...,,3
4,4,Dr. Dre,Rap/Hip-Hop,https://imgix.ranker.com/user_node_img/45/8916...,,4


In [17]:
song_df_db.head()

Unnamed: 0,artist_id,song_title,lyrics
0,1083,Radio 2,
1,1274,Sound Round,I'm young in my camper van\nThe world feels ol...
2,347,Okie from Muskogee,We don't smoke marijuana in Muskogee\nWe don't...
3,347,Mama Tried,The first thing I remember knowing\nWas a lone...
4,347,One Day at a Time,"I'm only human, I'm just a man\nHelp me to bel..."


# Grab songs

### remove the outliers, songs with no words or "songs" with way too many words

In [18]:
normal_songs = song_df_db[(song_df_db["lyrics"].str.len() <= 3500) & (song_df_db["lyrics"].str.len() >= 400)]

In [19]:
normal_songs

Unnamed: 0,artist_id,song_title,lyrics
2,347,Okie from Muskogee,We don't smoke marijuana in Muskogee\nWe don't...
3,347,Mama Tried,The first thing I remember knowing\nWas a lone...
4,347,One Day at a Time,"I'm only human, I'm just a man\nHelp me to bel..."
5,347,Sing Me Back Home,The warden led a prisoner down the hallway to ...
6,347,That’s the Way Love Goes,I've been throwing horseshoes over my left sho...
...,...,...,...
56463,1329,Heartbeat,(J. Jett/J. Allen)\nAre you my lover\nOr just ...
56464,1329,Let It Bleed,"Well, we all need someone we can lean on\nAnd ..."
56465,1329,MCA,(J. Rotten/S. Jones/G. Matlock/P. Cook)\nIt's ...
56467,1329,Play With Me,(Joan Jett/Kenny Laguna)\nLife is never what y...


In [20]:
sub_df = normal_songs

# Remove the duplicates (live versions, studio versions etc)

In [21]:
def remove_duplicate_songs(song_dataframe):
    song_titles_dict = {}
    index_removal_list = []
    for i, row in song_dataframe.iterrows():
        artist_id = row["artist_id"]
        if artist_id in song_titles_dict.keys():  
            for song in song_titles_dict[artist_id]:
                if row["song_title"] in song or song in row["song_title"]:
                    # print(row["song_title"])
                    index_removal_list.append(i)
            else:
                song_titles_dict[artist_id].append(row["song_title"])
        else: 
            song_titles_dict[artist_id] = [row["song_title"]]
    print("removed {} songs: ".format(len(index_removal_list)))
    return(song_dataframe.drop(index_removal_list))

In [22]:
sub_df = remove_duplicate_songs(sub_df)

removed 7644 songs: 


In [23]:
sub_df.shape

(44161, 3)

# Remove Wrong artists

In [24]:
# These are artists that we grabbed the songs for incorrectly, they grabbed songs for other artists
wrong_artists = ["Asia", "Argent", "Camel", "Santana", "Free", "Prince", "Sweet", "Traffic", "War", "Dio", "UFO"]

In [25]:
wrong_artists_id = []
for artist in wrong_artists:
    artist_id = artist_df_db[artist_df_db["artist_name"] == artist]["artist_id"].values[0]
    
    wrong_artists_id.append(artist_id)
    
wrong_artists_id

[1443, 1470, 1482, 1296, 1425, 1314, 1434, 1388, 1445, 1387, 1458]

In [26]:
sub_df = sub_df[~sub_df['artist_id'].isin(wrong_artists_id)] 
sub_df.shape

(42492, 3)

# Clean lyrics with Spacy Model

In [27]:
spacy_model = SpacyPreprocessor.load_model()

In [28]:
extra_stop_words = ["ooh", "oh", "ah", "la", "ha", "yeah", "to", "get", "let" , 
                    "gonna", "too", "gon", "na", "gotta", "got", "ta", "ya", "hey", "pron",
                   "come", "like", "look", "come", "tell", "away", "right", "way", "try"]

In [29]:
preprocessor = SpacyPreprocessor(spacy_model=spacy_model, lemmatize=True, remove_numbers=True, remove_stopwords=True, extra_stop_words=extra_stop_words)

adding stop words
adding ooh
adding oh
adding ah
adding la
adding ha
adding yeah
adding to
adding get
adding let
adding gonna
adding too
adding gon
adding na
adding gotta
adding got
adding ta
adding ya
adding hey
adding pron
adding come
adding like
adding look
adding come
adding tell
adding away
adding right
adding way
adding try


In [30]:
clean_lyrics_list = preprocessor.preprocess_text_list(list(sub_df.lyrics))

42492it [05:15, 134.60it/s]


In [31]:
clean_lyrics_list[0]

'smoke marijuana muskogee trip lsd burn draft card main street love live free party loving hold hand pitch woo let hair grow long shaggy hippie san francisco proud okie muskogee place square ball wave old glory courthouse white lightning big thrill leather boot style manly footwear beads roman sandal will see football rough thing campus kid respect college dean proud okie muskogee place square ball wave old glory courthouse white lightning big thrill white lightning big thrill muskogee oklahoma usa'

In [32]:
for i in range(len(clean_lyrics_list)):
    if ' way ' in clean_lyrics_list[i]:
        print(i)

13
36
58
90
123
158
251
343
370
376
382
451
452
496
529
545
621
679
730
794
863
865
875
893
910
980
1024
1111
1131
1144
1233
1240
1242
1364
1365
1387
1672
1724
1751
1780
1782
1802
1806
1822
1925
2023
2060
2103
2140
2142
2152
2181
2216
2281
2307
2319
2432
2607
2708
2776
2796
2822
2826
2831
2847
2906
2963
3140
3189
3229
3384
3395
3419
3437
3447
3450
3605
3658
3669
3737
3878
3899
3920
4023
4071
4090
4102
4121
4128
4259
4287
4391
4407
4444
4524
4541
4578
4644
4696
4716
4727
4745
4768
4805
4835
4852
4942
4958
4963
4980
5007
5022
5072
5075
5086
5312
5337
5345
5355
5403
5475
5528
5694
5819
5943
5967
5976
5984
6091
6108
6110
6142
6167
6182
6250
6264
6269
6303
6309
6323
6402
6435
6476
6538
6543
6552
6594
6605
6829
6956
6981
6994
7006
7032
7069
7100
7101
7128
7130
7241
7242
7276
7327
7339
7355
7362
7369
7379
7391
7394
7408
7415
7426
7505
7591
7688
7745
7748
7815
7871
7990
8059
8072
8073
8104
8128
8135
8191
8216
8255
8270
8279
8461
8707
8737
8741
8743
8745
8752
8857
8926
9066
9103
9150
9151
9175


# Vectorie Lrics with TFIDF

### I learned that LDA will work better with a normal count vectorizer, so I dodnt end up using TFIDF

In [33]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS



In [None]:
tfidf_vectorizer = TfidfVectorizer(max_df = 0.5, min_df = .01, stop_words=ENGLISH_STOP_WORDS.union(extra_stop_words))

In [None]:
dtm_tfidf = tfidf_vectorizer.fit_transform(clean_lyrics_list)

In [None]:
terms = tfidf_vectorizer.get_feature_names()

In [None]:
# tfidf_data_frame = pd.DataFrame(data=dtm_tfidf, columns=terms, index=clean_lyrics_list)
# tfidf_data_frame.head(1)

I realized TFIDF doesnt really work for LDA

# Count Vectorize

In [34]:
count_vectorizer = CountVectorizer(max_df = 0.5, min_df = .02, stop_words=ENGLISH_STOP_WORDS.union(extra_stop_words))

In [35]:
dtm_count = count_vectorizer.fit_transform(clean_lyrics_list)

In [36]:
terms = count_vectorizer.get_feature_names()

# LDA Dimension Reduction Topic modelling

In [37]:
lda_count = LatentDirichletAllocation(n_components=6, random_state=12)
doc_topic = lda_count.fit_transform(dtm_count)

In [38]:
multiple_topic_count = 0
for row in doc_topic:
    if (sum(row > .1)) > 1:
        multiple_topic_count += 1
        
multiple_topic_count

38126

In [39]:
import pyLDAvis
import pyLDAvis.sklearn
pyLDAvis.enable_notebook()

html = pyLDAvis.sklearn.prepare(lda_count, dtm_count, count_vectorizer, sort_topics=False 
                         #mds='tsne'
                    )

  from collections import Iterable
  from collections import Mapping
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))


In [40]:
html.topic_order

[1, 2, 3, 4, 5, 6]

In [68]:
pyLDAvis.save_html(html, "lda_viz_1.html")

In [42]:
html

Topic 1: Deep thinker (Time, think, mind, change)

Topic 2: Family (Man, Boy, woman, town, life)

Topic 3: Country Family (man, boy, woman, car, town, work)

Topic 4: Deep Thinker (time, change, mind, life, tomorrow)

Topic 5: Rock N Roll Love (Baby, Girl, Wanna/Want, Love, Honey, Babe, Shake)

Topic 6: Rock'n and Roll'n (Rock, Roll, Music, Sing)


In [43]:
Vt = pd.DataFrame(doc_topic.round(5),
             index = clean_lyrics_list,
             columns = html.topic_order).add_prefix("topic_")
Vt

Unnamed: 0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6
smoke marijuana muskogee trip lsd burn draft card main street love live free party loving hold hand pitch woo let hair grow long shaggy hippie san francisco proud okie muskogee place square ball wave old glory courthouse white lightning big thrill leather boot style manly footwear beads roman sandal will see football rough thing campus kid respect college dean proud okie muskogee place square ball wave old glory courthouse white lightning big thrill white lightning big thrill muskogee oklahoma usa,0.19023,0.00674,0.46270,0.26680,0.00672,0.06682
thing remember know lonesome whistle blow young'un dream grow ride freight train leave town know bind change mind mama try rebel child family meek mild mama know lie store despite sunday learning bad keep turn til mama hold anymore turn prison life parole steer mama try mama try mama try raise well plead deny leave blame because mama try dear old daddy rest soul leave mom heavy load try hard fill shoe work hour rest want good try raise refuse turn prison life parole steer mama try mama try mama try raise well plead deny leave blame because mama try,0.53470,0.00325,0.22912,0.14808,0.08161,0.00325
human man help believe stairway climb lord sake teach day time day time sweet jesus ask strength day yesterday go sweet jesus tomorrow sake teach day time remember walk man jesus know look bad push shove crowd mind sake teach day time day time sweet jesus ask strength day yesterday go sweet jesus tomorrow sake teach day time yes sake teach day time,0.28344,0.00442,0.00444,0.69883,0.00444,0.00443
warden lead prisoner hallway doom stand goodbye rest hear warden reach cell let guitar play friend request let sing home song hear old memory alive turn year sing home die recall sunday morning choir street come sing old gospel song hear singer song mama sing hear will sing home song hear old memory alive turn year sing home die sing home die,0.03046,0.00358,0.00359,0.42477,0.53404,0.00356
throw horseshoe left shoulder spend life search leaf clover run chase rainbow honey love love go love go babe music god world sing old grow losing make sorry honey worry know love love go love go babe music god world sing old grow losing make sorry honey worry know love love go,0.00481,0.00483,0.00482,0.17635,0.20254,0.60665
...,...,...,...,...,...,...
let about baby know shy get make tremble get sexy eye say talkin bout baby wait make sh say talkin bout baby wow let about baby touch kiss sweeter sugar make heart beat fast fast say talkin bout baby wait make sh sh sh sh say talkin bout baby wow cold night warm close eye go crazy wow whoo say talkin bout baby wait make sh say talkin bout baby wow talkin bout baby wait make sh say talkin bout baby stop,0.00402,0.00399,0.24285,0.00400,0.00403,0.74111
j jett j allen lover fancy cover hold night wish star eye shut tight hope dream recognize cause fun fun know want know need feel heartbeat heartbeat know want know need feel heartbeat feel heartbeat wonder thunder day watch eye burn love will return let slip cause fun fun know want know need feel heartbeat heartbeat know want know need feel heartbeat feel heartbeat feel heartbeat feel heartbeat heartbeat heartbeat heartbeat heartbeat heartbeat heartbeat heartbeat heartbeat feel heartbeat feel heartbeat heartbeat feel heartbeat heartbeat feel heartbeat,0.00389,0.39018,0.00393,0.00393,0.00390,0.59418
need lean want lean need lean want lean say breast open baby rest weary head space parking lot need little coke sympathy need dream want dream need dream want dream dream steel guitar engagement drunk health scented jasmine tea knife filthy dirty basement jade fade junky nurse pleasant company need lean want feed arm leg baby head lean need lean want baby lean live live,0.00495,0.53563,0.00497,0.16375,0.00492,0.28579
joan jett kenny laguna life think sentimental wink know instore feel warmin inside worry cause get tie guy girl thing thing world play play baby feel crazy borderline get hazy rule forgot blaze leave reel day play play lot fun let emotion run tonite go to lot thunder let emotion run round round round round play play,0.00509,0.16092,0.77222,0.00509,0.05153,0.00515


# sentiment Analysis

In [44]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

  regargs, varargs, varkwargs, defaults, formatvalue=lambda value: ""
  from collections import Sequence, defaultdict


In [45]:
import nltk
# nltk.download('vader_lexicon')

In [46]:
analyzer = SentimentIntensityAnalyzer()

In [47]:
full_lyrics = list(sub_df.lyrics)

In [48]:
sentiment_list = [analyzer.polarity_scores(l) for l in full_lyrics]

In [49]:
positive_list = [x['pos'] for x in sentiment_list]
negative_list = [x['neg'] for x in sentiment_list]
neutral_list = [x['neu'] for x in sentiment_list]
compound_list = [x['compound'] for x in sentiment_list]

In [50]:
max(compound_list), min(compound_list), sum(compound_list)/len(compound_list)

(0.9999, -0.9999, 0.38736862938906474)

So the average song is kind of positive

In [51]:
pos_song_index = compound_list.index(0.9999)
neg_song_index = compound_list.index(-0.9999)

In [52]:
full_lyrics[pos_song_index]

"Catch a star if you can\nWish for somethin' special\nLet it be me, my love is free\nSing a song to yourself\nThink of someone listenin'\nOne melody, you're all for me\n\nI'll write a symphony just for you and me\nIf you let me love you\nI'll paint a masterpiece just for you to see\nIf you let me love you, let me love you\n\nAre you ready, are you ready for love?\nYes I am, are you\nAre you ready, are you ready for love?\nYes I am, are you\nAre you ready, are you ready for love?\n(Ooh, ooh, baby)\n\nYou're the one like the sun\nShine your love around me\nYou'll always be the one for me\nSay the word, I'll be there\nLovin' you forever\nDon't let me go, just say it's so\n\nWe'll hear the music ring from the mountaintops\nTo the valley below us\nWe'll serenade the world with a lullaby\nSo the angels will know us, angels will know us\n\nAre you ready, are you ready for love?\nYes I am, are you\nAre you ready, are you ready for love?\nYes I am, are you\nAre you ready, are you ready for love

In [53]:
full_lyrics[neg_song_index]

'Scumbag, scumbag\nScumbag, scumbag\nScumbag, yeah! Scumbag\nScumbag, scumbag, ho!\nScumbag, scumbag\nScumbag, scumbag\nScumbag, scumbag\nScumbag, scumbag\nScumbag, scumbag\nScumbag, scumbag\nScumbag, scumbag\nScumbag, scumbag\nScumbag, scumbag\nScumbag\nScumbag, scumbag, scumbag\nScumbag\nScumbag, scumbag\nScumbag, scumbag\nScum, scum, scumbag\nScumbag, scumbag, scumbag, scumbag\nScumbag, scumbag, scumbag, scumbag\n-"Hey, listen! I don\'t know whether you can tell what the\nWords are to this song but there\'s only two of them\nAnd I\'d like to have you sing along \'cause it\'s real easy\nAnybody who comes to the Fillmore East can sing this song\nThe name of the song is Scumbag\nOk? And all you gotta do is sing \'scumbag\'\nRight on, brothers and sisters let\'s hear it for the \'scumbag\'!"\nScumbag! Scumbag! Scumbag! Scumbag! Scumbag!\nScumbag! Scumbag! Scumbag! Scumbag!\nScumbag, scumbag, scumbag, scumbag, scumbag\nScumbag, scumbag, scumbag, scumbag, scumbag\nScumbag, scumbag, scumba

# combine Dataframes

In [54]:
sub_df.shape

(42492, 3)

In [55]:
Vt.shape

(42492, 6)

In [56]:
len(compound_list)

42492

In [57]:
sub_df.reset_index(inplace=True, drop=True)
Vt.reset_index(inplace=True)

In [58]:
Vt = Vt.rename(columns={"index": "clean_lyrics"})

In [59]:
sub_df.index

RangeIndex(start=0, stop=42492, step=1)

In [60]:
info_and_topics = sub_df.join(Vt,how='outer')

In [61]:
info_and_topics["song_id"] = info_and_topics.index

In [62]:
info_and_topics["sentiment"] = compound_list

In [63]:
info_and_topics

Unnamed: 0,artist_id,song_title,lyrics,clean_lyrics,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,song_id,sentiment
0,347,Okie from Muskogee,We don't smoke marijuana in Muskogee\nWe don't...,smoke marijuana muskogee trip lsd burn draft c...,0.19023,0.00674,0.46270,0.26680,0.00672,0.06682,0,0.9917
1,347,Mama Tried,The first thing I remember knowing\nWas a lone...,thing remember know lonesome whistle blow youn...,0.53470,0.00325,0.22912,0.14808,0.08161,0.00325,1,-0.9192
2,347,One Day at a Time,"I'm only human, I'm just a man\nHelp me to bel...",human man help believe stairway climb lord sak...,0.28344,0.00442,0.00444,0.69883,0.00444,0.00443,2,0.9674
3,347,Sing Me Back Home,The warden led a prisoner down the hallway to ...,warden lead prisoner hallway doom stand goodby...,0.03046,0.00358,0.00359,0.42477,0.53404,0.00356,3,-0.8385
4,347,That’s the Way Love Goes,I've been throwing horseshoes over my left sho...,throw horseshoe left shoulder spend life searc...,0.00481,0.00483,0.00482,0.17635,0.20254,0.60665,4,0.9900
...,...,...,...,...,...,...,...,...,...,...,...,...
42487,1329,Talkin’ Bout My Baby,Let me tell you 'bout my baby\nYou know he's n...,let about baby know shy get make tremble get s...,0.00402,0.00399,0.24285,0.00400,0.00403,0.74111,42487,0.9574
42488,1329,Heartbeat,(J. Jett/J. Allen)\nAre you my lover\nOr just ...,j jett j allen lover fancy cover hold night ...,0.00389,0.39018,0.00393,0.00393,0.00390,0.59418,42488,0.9884
42489,1329,Let It Bleed,"Well, we all need someone we can lean on\nAnd ...",need lean want lean need lean want lean say br...,0.00495,0.53563,0.00497,0.16375,0.00492,0.28579,42489,0.9801
42490,1329,Play With Me,(Joan Jett/Kenny Laguna)\nLife is never what y...,joan jett kenny laguna life think sentimental ...,0.00509,0.16092,0.77222,0.00509,0.05153,0.00515,42490,0.9524


# Add new song dataframe to our database
I commented it out because i dont want to overwrite by accident

In [65]:
# info_and_topics.to_sql('song_topics', engine, index=False, if_exists='replace')

In [66]:
query = 'SELECT * FROM song_topics;'
info_and_topics_from_db = pd.read_sql(query, connection)

In [67]:
info_and_topics_from_db

Unnamed: 0,artist_id,song_title,lyrics,clean_lyrics,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,song_id,sentiment
0,347,Okie from Muskogee,We don't smoke marijuana in Muskogee\nWe don't...,smoke marijuana muskogee trip lsd burn draft c...,0.19023,0.00674,0.46270,0.26680,0.00672,0.06682,0,0.9917
1,347,Mama Tried,The first thing I remember knowing\nWas a lone...,thing remember know lonesome whistle blow youn...,0.53470,0.00325,0.22912,0.14808,0.08161,0.00325,1,-0.9192
2,347,One Day at a Time,"I'm only human, I'm just a man\nHelp me to bel...",human man help believe stairway climb lord sak...,0.28344,0.00442,0.00444,0.69883,0.00444,0.00443,2,0.9674
3,347,Sing Me Back Home,The warden led a prisoner down the hallway to ...,warden lead prisoner hallway doom stand goodby...,0.03046,0.00358,0.00359,0.42477,0.53404,0.00356,3,-0.8385
4,347,That’s the Way Love Goes,I've been throwing horseshoes over my left sho...,throw horseshoe left shoulder spend life searc...,0.00481,0.00483,0.00482,0.17635,0.20254,0.60665,4,0.9900
...,...,...,...,...,...,...,...,...,...,...,...,...
42487,1329,Talkin’ Bout My Baby,Let me tell you 'bout my baby\nYou know he's n...,let about baby know shy get make tremble get s...,0.00402,0.00399,0.24285,0.00400,0.00403,0.74111,42487,0.9574
42488,1329,Heartbeat,(J. Jett/J. Allen)\nAre you my lover\nOr just ...,j jett j allen lover fancy cover hold night ...,0.00389,0.39018,0.00393,0.00393,0.00390,0.59418,42488,0.9884
42489,1329,Let It Bleed,"Well, we all need someone we can lean on\nAnd ...",need lean want lean need lean want lean say br...,0.00495,0.53563,0.00497,0.16375,0.00492,0.28579,42489,0.9801
42490,1329,Play With Me,(Joan Jett/Kenny Laguna)\nLife is never what y...,joan jett kenny laguna life think sentimental ...,0.00509,0.16092,0.77222,0.00509,0.05153,0.00515,42490,0.9524


# Get Pairwise distance

In [48]:
from sklearn.metrics import pairwise_distances

In [49]:
doc_topic_matrix = doc_topic.round(5)

In [50]:
# Try metric='euclidean' and metric='cosine', and see what changes!
dists = pairwise_distances(doc_topic_matrix, metric='cosine')
dists.shape

(43735, 43735)

In [51]:
# Numpy outputs an array here, so we want to get the names of the beers back
dists = pd.DataFrame(data=dists, index=info_and_topics.index, columns=info_and_topics.index)

In [52]:
dists.iloc[0:5, 0:5]

Unnamed: 0,0,1,2,3,4
0,0.0,0.910168,0.907234,0.917888,0.886059
1,0.910168,0.0,0.902968,0.914128,0.880955
2,0.907234,0.902968,0.0,0.911372,0.877063
3,0.917888,0.914128,0.911372,0.0,0.412242
4,0.886059,0.880955,0.877063,0.412242,0.0


In [None]:
# dists.to_pickle('distance_matrix.pkl')

# mock reccomendation

In [None]:
songs_in_playlist = [0, 200, 500]
dists[songs_in_playlist].head()

In [None]:
songs_summed = dists[songs_in_playlist].sum(axis=1)
songs_summed = songs_summed.sort_values(ascending=True)
songs_summed.head()

In [None]:
# Filter out the beers used as input using `.isin()`
mask = ~songs_summed.index.isin(songs_in_playlist)
ranked_songs = songs_summed.index[mask]
ranked_songs = ranked_songs.tolist()

top_songs = ranked_songs[:5]
top_songs

In [None]:
info_and_topics[info_and_topics["song_id"].isin(top_songs)]

# Put the Recc in a function

In [None]:
def get_recc(song_list, distance_matrix, song_matrix, artist_matrix):
    song_id_list = []
    for artist, song in song_list:
        artist_id = artist_matrix[artist_matrix["artist_name"]==artist]["artist_id"].values[0]
    
        try:
            song_id = song_matrix[(song_matrix["artist_id"]==artist_id) & (song_matrix["song_title"]==song)]["artist_id"].values[0]
            song_id_list.append(song_id)
        except IndexError:
            print("Song not Found: {}".format(song))
     
    # get the distance matrix for these songs
    songs_summed = distance_matrix[song_id_list].sum(axis=1)
    songs_summed = songs_summed.sort_values(ascending=True)
    
    mask = ~songs_summed.index.isin(song_id_list)
    ranked_songs = songs_summed.index[mask]
    ranked_songs = ranked_songs.tolist()
    top_recc_songs = ranked_songs[:5]
    print(top_recc_songs)
    
    recc_matrix = song_matrix[song_matrix["song_id"].isin(top_recc_songs)]
    
    return(recc_matrix)

In [None]:
song_list = [("Tom Petty", "It’s Good to Be King"), ("Merle Haggard", "I Never Go Home Anymore"), ("Bruce Springsteen", "Glory Days")]

In [None]:
get_recc(song_list, dists, info_and_topics, artist_df_db)