MUSIC RECOMMENDATION SYSTEM
===========================
- ** Predict genres **
- ** Predict moods **
- ** Recommend base on cosin similarity and jaccard similarity **

In [1]:
import pickle
import pandas as pd
import numpy as np

In [2]:
# makes printing a dict look nice
import pprint
pp = pprint.PrettyPrinter(indent=4)

In [3]:
from collections import namedtuple
# Create a namedtuple called 'Song'
Song = namedtuple("Song", ["artist", "title"])
# I'm choosing to also include probability, so I'm making Mood a namedtuple as well
Mood = namedtuple("Mood", ["description", "probability"])

## Load model

In [57]:
# Load my resources
predict_genres = pickle.load(open('predict_genres.pickle', 'rb'))
predict_moods_lyrics  = pickle.load(open('lyrics_predict_moods.chain.pickle', 'rb'))
predict_moods_audio = pickle.load(open('audio_predict_moods.chain.pickle', 'rb'))
my_database = pickle.load(open('my_database_new.pickle', 'rb'))

In [58]:
# Tools for cleaning lyrics
from string import punctuation
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
from nltk.stem.snowball import SnowballStemmer

translator = str.maketrans('','', punctuation)
stemmer = SnowballStemmer('english')

def clean_text(raw_text):
    
    clean_words = []
    
    raw_text = " ".join(raw_text).lower()
    raw_text = raw_text.translate(translator)
    
    for word in raw_text.split():
        
        if word not in ENGLISH_STOP_WORDS:
            
            clean_words.append(stemmer.stem(word))
    
    return " ".join(clean_words)

In [59]:
from sklearn.metrics import jaccard_similarity_score

df_moods = my_database.moods.str.join(',').str.get_dummies(sep=',')
df_genres_moods = pd.concat((my_database.genres, df_moods), axis=1)

In [103]:
from sklearn.metrics.pairwise import cosine_similarity

In [81]:
def recommend_similar_songs(audio_features, lyrics_features):
    
    genre = predict_genres.predict(audio_features.reshape((1, -1)))
    
    if lyrics_features is not None:
        
        lyrics_features_clean = clean_text(lyrics_features)
        
        moods_1 = predict_moods_audio.predict_proba(audio_features.reshape(1, -1))
        moods_2 = predict_moods_lyrics.predict_proba([lyrics_features_clean])
        moods = (moods_1*2 + moods_2*1)*1/3
    
    else:
        moods = predict_moods_audio.predict_proba(audio_features.reshape((1, -1)))
    
    predict_result = np.concatenate((genre.reshape(1,1), moods), axis = 1)
    
    similarity = df_genres_moods.apply(lambda x: jaccard_similarity_score(predict_result, np.array(x).reshape(1, -1)), 
                                       axis=1)
    similarity = similarity.sort_values(ascending=False)[0:50]
    
    top_10 = my_database.iloc[similarity.sample(10).index]
    
    result = [ Song(artist=row["artist"], title=row['name']) for idx, row in top_10.iterrows()]
    final_result_dictionary = dict(playlist=result)
    
    return final_result_dictionary

In [108]:
def recommend_similar_songs(audio_features, lyrics_features):
    
    genre = predict_genres.predict(audio_features.reshape((1, -1)))
    
    if lyrics_features is not None:
        
        lyrics_features_clean = clean_text(lyrics_features)
        
        moods_1 = predict_moods_audio.predict_proba(audio_features.reshape(1, -1))
        moods_2 = predict_moods_lyrics.predict_proba([lyrics_features_clean])
        moods = (moods_1*2 + moods_2*1)*1/3
    
    else:
        moods = predict_moods_audio.predict_proba(audio_features.reshape((1, -1)))
    
    predict_result = np.concatenate((genre.reshape(1,1), moods), axis = 1)
    
    similarity = df_moods.apply(lambda x: cosine_similarity(moods, np.array(x).reshape(1, -1))[0, 0], 
                                       axis=1)
    similarity = similarity.sort_values(ascending=False)[0:50]
    
    top_10 = my_database.iloc[similarity.sample(10).index]
    
    result = [ Song(artist=row["artist"], title=row['name']) for idx, row in top_10.iterrows()]
    final_result_dictionary = dict(playlist=result)
    
    return final_result_dictionary

In [42]:
audio = np.array(my_database.iloc[0, 2])
lyrics = my_database.iloc[0, 3]

In [43]:
genre = predict_genres.predict(audio.reshape((1, -1)))
genre

array(['country'], dtype=object)

In [98]:
moods_1 = predict_moods_audio.predict_proba(audio.reshape(1, -1))
moods_2 = predict_moods_lyrics.predict_proba([clean_text(lyrics)])
moods = (moods_1*2 + moods_2*1)*1/3

moods.shape

(1, 7)

In [77]:
print(np.concatenate((genre.reshape(1,1), moods), axis = 1).shape)

(1, 8)


In [107]:
simi = df_moods.apply(lambda x: cosine_similarity(moods, np.array(x).reshape(1, -1))[0, 0], axis=1)
simi

0        0.968712
1        0.188961
2        0.062632
3        0.968712
4        0.188961
5        0.188961
6        0.204599
7        0.062632
8        0.204599
9        0.204599
10       0.200771
11       0.741081
12       0.200771
13       0.020877
14       0.204599
15       0.968712
16       0.000000
17       0.062632
18       0.829656
19       0.083510
20       0.204599
21       0.968712
22       0.968712
23       0.079334
24       0.188961
25       0.968712
26       0.703929
27       0.045930
28       0.968712
29       0.968712
           ...   
25947    0.204599
25948    0.829656
25949    0.741081
25950    0.968712
25951    0.079334
25952    0.204599
25953    0.968712
25954    0.200771
25955    0.020877
25956    0.079334
25957    0.204599
25958    0.047240
25959    0.717461
25960    0.045930
25961    0.829656
25962    0.204599
25963    0.079334
25964    0.062632
25965    0.091528
25966    0.204599
25967    0.083510
25968    0.829656
25969    0.204599
25970    0.020877
25971    0

In [109]:
results = recommend_similar_songs(audio, lyrics)
pp.pprint(results)

{   'playlist': [   Song(artist='Steve Earle', title='Christmas In Washington'),
                    Song(artist='The Pains Of Being Pure At Heart', title='Come Saturday'),
                    Song(artist='Pavement', title='Summer Babe (Winter Version)'),
                    Song(artist='Chuck Berry', title='Memphis Tennessee'),
                    Song(artist='Ibrahim Ferrer', title='Ay, Candela'),
                    Song(artist='Rocola Bacalao', title='Mis Amigas Las Plantas'),
                    Song(artist='Ray Price', title='Crazy Arms'),
                    Song(artist='Peppermint Harris', title='Raining In My Heart'),
                    Song(artist='Merle Haggard', title="Workin' Man Blues"),
                    Song(artist='The Clash', title='Complete Control')]}
