In [48]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [49]:
df = pd.read_json("MasterSongList.json")

In [99]:
df.shape

(36733, 16)

In [50]:
audio_f = df['audio_features']
audio_f = audio_f.values.tolist()
header = ['key','energy','liveliness','tempo','speechiness','acousticness','instrumentalness','time_signature','duration','loudness','valence','danceability','mode','time_signature_confidence','tempo_confidence','key_confidence','mode_confidence']
audio_f_df = pd.DataFrame(audio_f, columns= header)
audio_f_df = pd.concat([audio_f_df, df['genres']],axis = 1)
audio_f_df.loc[:,'genres'] = audio_f_df['genres'].apply(lambda x: " ".join(x))
audio_f_df.loc[audio_f_df['genres']=="",'genres'] = np.nan
audio_f_df.dropna(inplace=True)
audio_f_df.loc[:,'genres'] = audio_f_df['genres'].apply(lambda x: x.split(':')[0] if len(x)>0 else x)
popular_genres = ['rock','rap','r&b','dance','jazz','indie','electronica','country','singer-songwriter','latin','pop','funk']

In [51]:
audio_f_df = audio_f_df[audio_f_df['genres'].apply(lambda x: x in popular_genres)]

In [52]:
audio_f_df['genres'].value_counts()

rock                 6435
rap                  2452
r&b                  2344
dance                2000
jazz                 1889
indie                1834
electronica          1249
country              1075
singer-songwriter    1034
latin                1032
pop                   689
funk                  470
Name: genres, dtype: int64

In [262]:
X = scaler.fit_transform(audio_f_df.drop(columns=['genres']))
y = audio_f_df['genres']

In [263]:
# rfc
rfc_test = RandomForestClassifier()

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3,random_state = 101)
rfc_test.fit(X_train, y_train)

RandomForestClassifier()

In [268]:
rfc_test.feature_importances_

array([0.0337756 , 0.07604501, 0.05072729, 0.06526776, 0.09774239,
       0.09697707, 0.0703572 , 0.01033314, 0.00612063, 0.06792888,
       0.06052538, 0.06462322, 0.1010864 , 0.05107563, 0.05428734,
       0.05739562, 0.03573143])

In [264]:
predictions = rfc_test.predict(X_test)
print(accuracy_score(y_test,predictions))

0.5085172567027108


In [266]:
#rfe

from sklearn.feature_selection import RFE
rfe_model = RFE(rfc, n_features_to_select=9)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3,random_state = 101)
rfe_model.fit(X_train, y_train)

RFE(estimator=RandomForestClassifier(), n_features_to_select=9)

In [267]:
print(accuracy_score(y_test,rfe_model.predict(X_test)))

0.5039253443934232


In [276]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3,random_state = 101)

In [62]:
from sklearn.feature_selection import SelectKBest
sK = SelectKBest(k = 9)
X_train_kbest = sK.fit_transform(X,y)
X_train_kbest_1, X_test_kbest_1, y_train, y_test = train_test_split(X_train_kbest,y, test_size = 0.2, random_state = 101)
rfc_Kbest.fit(X_train_kbest_1, y_train)

In [67]:
X_train_kbest = sK.fit_transform(X,y)

In [68]:
X_train_kbest_1, X_test_kbest_1, y_train, y_test = train_test_split(X_train_kbest,y, test_size = 0.2, random_state = 101)

In [69]:
rfe_model.fit(X_train_kbest_1, y_train)

RFE(estimator=RandomForestClassifier(max_features='sqrt', min_samples_split=10),
    n_features_to_select=9)

In [70]:
print(accuracy_score(y_test, rfe_model.predict(X_test_kbest_1)))

0.3717821782178218


In [236]:
param_grid = {
    'n_estimators': [5, 10, 100],
    'min_samples_split': [4, 5, 10],
    'max_features': ['sqrt', 'log2', 'auto']
}

In [237]:
from sklearn.model_selection import GridSearchCV
gcv = GridSearchCV(rfc, param_grid, verbose = 2)

In [238]:
gcv.fit(X_train, y_train)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=5; total time=   0.4s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=5; total time=   0.3s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=5; total time=   0.4s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=5; total time=   0.2s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=5; total time=   0.3s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=10; total time=   0.8s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=10; total time=   0.7s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=10; total time=   0.8s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=10; total time=   0.8s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=10; total time=   1.2s
[CV] END max_features=sqrt, min_samples_split=4, n_estimators=100; total time=  10.4s
[CV] EN

[CV] END max_features=auto, min_samples_split=4, n_estimators=10; total time=   0.9s
[CV] END max_features=auto, min_samples_split=4, n_estimators=10; total time=   1.0s
[CV] END max_features=auto, min_samples_split=4, n_estimators=10; total time=   0.9s
[CV] END max_features=auto, min_samples_split=4, n_estimators=10; total time=   1.0s
[CV] END max_features=auto, min_samples_split=4, n_estimators=100; total time=   8.4s
[CV] END max_features=auto, min_samples_split=4, n_estimators=100; total time=   9.8s
[CV] END max_features=auto, min_samples_split=4, n_estimators=100; total time=   9.1s
[CV] END max_features=auto, min_samples_split=4, n_estimators=100; total time=   9.5s
[CV] END max_features=auto, min_samples_split=4, n_estimators=100; total time=   9.8s
[CV] END max_features=auto, min_samples_split=5, n_estimators=5; total time=   0.5s
[CV] END max_features=auto, min_samples_split=5, n_estimators=5; total time=   0.4s
[CV] END max_features=auto, min_samples_split=5, n_estimators=

GridSearchCV(estimator=RandomForestClassifier(),
             param_grid={'max_features': ['sqrt', 'log2', 'auto'],
                         'min_samples_split': [4, 5, 10],
                         'n_estimators': [5, 10, 100]},
             verbose=2)

In [240]:
print(accuracy_score(y_test, gcv.predict(X_test)))

0.46508774082709536


In [241]:
gcv.best_params_

{'max_features': 'log2', 'min_samples_split': 5, 'n_estimators': 100}

In [274]:
rfc_best = gcv.best_estimator_

In [277]:
rfc_best.fit(X_train, y_train)

RandomForestClassifier(max_features='log2', min_samples_split=5)

In [278]:
print(accuracy_score(y_test, rfc_best.predict(X_test)))

0.5123685379943712


In [272]:
from sklearn.feature_selection import SelectKBest
sK = SelectKBest(k = 9)
X_train_kbest = sK.fit_transform(X,y)
X_train_kbest_1, X_test_kbest_1, y_train, y_test = train_test_split(X_train_kbest,y, test_size = 0.2, random_state = 101)
rfc_best.fit(X_train_kbest_1, y_train)

RandomForestClassifier(max_features='log2', min_samples_split=5)

In [273]:
print(accuracy_score(y_test, rfc_best.predict(X_test_kbest_1)))

0.49166851810708734


In [54]:
rfc_best = RandomForestClassifier()

In [55]:
import pickle
pickle.dump(rfc_best, open('model.pkl', 'wb'))

# Lyric

In [106]:
test_df = pd.read_json("MasterSongList.json")

In [120]:
len(test_df['lyrics_features'])

36733

In [116]:
test_df['lyrics_features'].apply(lambda x: len(x)>0).sum()

20931

In [87]:
df = pd.read_json("MasterSongList.json")

In [88]:
df['lyrics_features'] = df['lyrics_features'].apply(lambda x: " ".join(x))

In [89]:
df.loc[df['lyrics_features']=="",'lyrics_features'] = np.nan

In [93]:
df = df[['moods','lyrics_features']]

In [94]:
lyric_df = df.dropna()

In [95]:
lyric_df

Unnamed: 0,moods,lyrics_features
0,"[energetic, motivational]",oppa gangnam style gangnam style najeneun ttas...
1,[happy],lately i ve been i ve been losing sleep dreami...
2,"[happy, celebratory, rowdy]",party rock yeah woo let s go party rock is in ...
3,"[happy, energetic, celebratory]",alagamun lan weh wakun heya hanun gon alagamun...
4,[energetic],j lo the other side out my mine it s a new gen...
...,...,...
36702,[mellow],go away from my window leave at your own chose...
36716,"[sexual, seductive, nocturnal, lush]",thinking of you that s all i seem to do when y...
36720,"[energetic, visceral]",hey hey ladies in the place i m callin out to ...
36723,[cocky],yes yeah kayne philadelphia freeway y all know...


In [96]:
from string import punctuation
from sklearn.feature_extraction._stop_words import ENGLISH_STOP_WORDS
from nltk.stem.snowball import SnowballStemmer

stemmer = SnowballStemmer('english')
translator = str.maketrans('','',punctuation)
def clean_lyrics (string):
    string = string.lower()
    string = string.translate(translator)
    split_review = string.split()
    clean_words = []
    for x in split_review:
        if x not in ENGLISH_STOP_WORDS:
            clean_words.append(stemmer.stem(x))
    final_string = " ".join(clean_words)
    return final_string

In [97]:
df['moods'].value_counts()

[sprightly]                         1941
[funky]                             1419
[mellow]                            1098
[soothing]                           920
[aggressive]                         780
                                    ... 
[gloomy, atmospheric]                  1
[funky, classy, nocturnal, lush]       1
[celebratory, sexual]                  1
[hypnotic, nocturnal]                  1
[cold, visceral]                       1
Name: moods, Length: 334, dtype: int64

In [98]:
lyric_df = lyric_df[lyric_df['moods'].apply(lambda x: 'energetic' in x)]

In [99]:
lyric_df['moods'].value_counts()

[energetic]                                      418
[energetic, celebratory]                         208
[energetic, motivational]                        207
[energetic, angsty]                              172
[energetic, visceral]                            109
[happy, energetic, celebratory]                  109
[energetic, cold]                                109
[energetic, angsty, rowdy]                       104
[energetic, rowdy]                                77
[earthy, energetic, sprightly]                    70
[energetic, aggressive, visceral]                 64
[energetic, raw]                                  54
[funky, energetic]                                48
[energetic, angsty, aggressive, trashy]           42
[energetic, sprightly, seductive]                 36
[energetic, rowdy, motivational]                  32
[happy, energetic, celebratory, motivational]     32
[happy, energetic, celebratory, raw]              32
[happy, energetic]                            

In [100]:
def fix(list_moods):
    temp = []
    for i in list_moods:
        if i in final_moods.keys():
            temp.append(i)
    if len(temp)>0:
        return temp
    return np.nan

In [68]:
lyric_df['moods'] = lyric_df['moods'].apply(fix)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lyric_df['moods'] = lyric_df['moods'].apply(fix)


In [101]:
lyric_df['lyrics_features'] = lyric_df['lyrics_features'].apply(clean_lyrics)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lyric_df['lyrics_features'] = lyric_df['lyrics_features'].apply(clean_lyrics)


In [103]:
moods = []
for x in lyric_df['moods']:
    for y in x:
        moods.append(y)

In [104]:
count_moods = {}
for i in moods:
    if i not in count_moods:
        count_moods[i] = 1
    else:
        count_moods[i]+=1

In [105]:
count_moods

{'energetic': 2305,
 'motivational': 319,
 'happy': 319,
 'celebratory': 412,
 'earthy': 80,
 'sprightly': 106,
 'visceral': 276,
 'angsty': 318,
 'rowdy': 297,
 'cold': 164,
 'funky': 81,
 'cocky': 72,
 'sexual': 23,
 'raw': 165,
 'seductive': 36,
 'aggressive': 137,
 'campy': 23,
 'nocturnal': 59,
 'hypnotic': 16,
 'atmospheric': 2,
 'warm': 10,
 'trashy': 42}

In [123]:
final_moods = {}
for x in count_moods:
    if count_moods[x]>200:
        final_moods[x] = count_moods[x]

In [124]:
final_moods

{'energetic': 2305,
 'motivational': 319,
 'happy': 319,
 'celebratory': 412,
 'visceral': 276,
 'angsty': 318,
 'rowdy': 297}

In [125]:
main_moods_df = lyric_df.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  main_moods_df = lyric_df.dropna(inplace=True)


In [131]:
main_moods_df = lyric_df

In [128]:
lyric_df

Unnamed: 0,moods,lyrics_features
0,"[energetic, motivational]",oppa gangnam style gangnam style najeneun ttas...
3,"[happy, energetic, celebratory]",alagamun lan weh wakun heya hanun gon alagamun...
4,[energetic],j lo s new generat mr worldwid parti peopl flo...
7,"[energetic, motivational]",threw wish don t ask ll tell look fell way tra...
11,"[energetic, celebratory]",shine bright like diamond shine bright like di...
...,...,...
36246,[energetic],littl tin soldier want jump look eye dream pas...
36312,"[energetic, cold, visceral]",day pray pray die just don t fuck care hope to...
36318,"[energetic, angsty, aggressive, trashy]",makeup counter non stick pad cat bed hardwar c...
36419,[energetic],dont want love factori aint machin babi im hum...


In [129]:
from sklearn.ensemble import RandomForestClassifier
rfc_multi = RandomForestClassifier()

In [130]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

In [132]:
lyrics_data = main_moods_df['lyrics_features']

In [133]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vec = TfidfVectorizer()

In [134]:
tf_idf = tfidf_vec.fit_transform(lyrics_data)

In [135]:
label = main_moods_df['moods']

In [136]:
y = mlb.fit_transform(label)

In [137]:
mlb.classes_

array(['aggressive', 'angsty', 'atmospheric', 'campy', 'celebratory',
       'cocky', 'cold', 'earthy', 'energetic', 'funky', 'happy',
       'hypnotic', 'motivational', 'nocturnal', 'raw', 'rowdy',
       'seductive', 'sexual', 'sprightly', 'trashy', 'visceral', 'warm'],
      dtype=object)

In [138]:
from sklearn.model_selection import train_test_split

In [139]:
X_train, X_test, y_train, y_test = train_test_split(tf_idf, y, test_size = 0.3, random_state = 101)

In [140]:
rfc_multi.fit(X_train, y_train)

RandomForestClassifier()

In [141]:
predictions = rfc_multi.predict(X_test)

In [142]:
print(accuracy_score(y_test, predictions))

0.20520231213872833


In [143]:
import pandas as pd
import pickle
from sklearn.datasets import load_diabetes

In [144]:
pickle.dump(rfc_multi, open('model_lyrics.pkl', 'wb'))

In [145]:
string = 'saturday mornin jumped outta bed and put on my best suit got in my car and raced like a jet all the way to you knocked on your door with heart in my hand to ask you a question cause i know that youre an oldfashioned man yeah yeah  pre  can i have your daughter for the rest of my life say yes say yes cause i need to know you say ill never get your blessin till the day i die tough luck my friend but the answer is no why you gotta be so rude dont you know im human too why you gotta be so rude im gonna marry her anyway marry that girl marry her anyway marry that girl yeah no matter what you say marry that girl and well be a family why you gotta be so rude i hate to do this you leave no choice cant live without her love me or hate me we will be both standin at that altar or we will run away to another galaxy you know you know shes in love with me she will go anywhere i go you might also likepre  can i have your daughter for the rest of my life say yes say yes cause i need to know you say ill never get your blessin till the day i die tough luck my friend cause the answers still no why you gotta be so rude dont you know im human too why you gotta be so rude im gonna marry her anyway marry that girl marry her anyway marry that girl no matter what you say marry that girl and well be a family why you gotta be so rude rude why you gotta be so–  bridge can i have your daughter for the rest of my life say yes say yes cause i need to know you say ill never get your blessin till the day i die tough luck my friend but no still means no why you gotta be so rude dont you know im human too why you gotta be so rude im gonna marry her anyway marry that girl marry her anyway marry that girl no matter what you say marry that girl and well be a family why you gotta be so rude why why why why say say why you gotta be so'

In [146]:
values = tfidf_vec.transform([string])

In [147]:
pickle.dump(tfidf_vec, open("tfidf_vec.pkl","wb"))
pickle.dump(tf_idf, open("tfidf.pkl","wb"))

In [149]:
print(tfidf_vec)

TfidfVectorizer()


In [150]:
pickle.dump(mlb, open("mlb.pkl","wb"))

In [151]:
import numpy as np

In [152]:
def is_in(li,list_moods):
    for x in li:
        if x in list_moods:
            return li
    return np.nan

In [153]:
def get_similar_song(list_moods, genres):
    temp_df = pd.read_json('MasterSongList.json')
    temp_df['genres'] = temp_df['genres'].apply(lambda x: "".join(x))
    new_df = temp_df[temp_df['genres']==genres]
    new_df['moods'] = new_df['moods'].apply(lambda x: is_in(x,list_moods))
    return new_df

In [154]:
final_df = get_similar_song(['energetic'],'r&b')
final_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['moods'] = new_df['moods'].apply(lambda x: is_in(x,list_moods))


Unnamed: 0,album,picture,genres,name,artist,yt_id,yt_views,audio_features,sub_context,moods,context,recording_id,lyrics_features,_id,decades,new_context
30,If I Were A Boy/Single Ladies,http://images.musicnet.com/albums/019/117/707/...,r&b,Single Ladies (Put A Ring On It),Beyoncé,4m1EFMoRFvY,434484715,"[1, 0.613807, 0.226733, 193.358, 0.288888, 0.0...",[girls night out],,[party],3726.0,"[all, the, single, ladies, all, the, single, l...",{'$oid': '52fdfb3e0b9398049f3ccb45'},[],party
50,Halo/Diva,http://images.musicnet.com/albums/026/924/959/...,r&b,Halo,Beyoncé,bnVUHWCynig,325958576,"[11, 0.7200329999999999, 0.056323, 79.983, 0.0...",[getting lucky],,[sexy],73440.0,"[remember, those, walls, i, built, well, baby,...",{'$oid': '52fdfb430b9398049f3d4c41'},[2010s],sexy
61,B.o.B Presents: The Adventures Of Bobby Ray,http://images.musicnet.com/albums/039/519/219/...,r&b,Airplanes (Feat. Hayley Williams Of Paramore),B.o.B,kn6-c223DUU,295253751,"[6, 0.8552639999999999, 0.038142999999999996, ...",,,[funky],78186.0,[],{'$oid': '52fdfb470b9398049f3db341'},[],
64,Run The World (Girls),http://images.musicnet.com/albums/073/441/943/...,r&b,Run The World (Girls),Beyoncé,VBmMU_iwe6U,282306211,"[0, 0.8994169999999999, 0.37197199999999997, 1...","[grinding at a nightclub, getting lucky]",,"[party, party]",9765.0,"[girls, we, run, this, mutha, yeah, girls, we,...",{'$oid': '52fdfb3e0b9398049f3ccc3a'},[],sexy
78,A Girl Like Me (CD Full Length),http://images.musicnet.com/albums/007/082/405/...,r&b,Unfaithful,Rihanna,rp4UwPZfRis,238170105,"[7, 0.8178500000000001, 0.157003, 133.025, 0.0...","[girls night out, getting lucky, housework, da...",,"[party, party, party, party, party]",39469.0,"[story, of, my, life, searching, for, the, rig...",{'$oid': '52fdfb450b9398049f3d89f9'},[2000s],housework
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35439,Concrete Law (Parental Advisory),http://images.musicnet.com/albums/000/689/771/...,r&b,Jump Back,BackBone,0k4JD8-HGwE,429,"[6, 0.580453, 0.214618, 96.419, 0.374316, 0.03...","[lying low on a sunday afternoon, city cruising]",,"[relax, relax]",20980.0,"[now, i, m, give, it, to, ya, straight, gram, ...",{'$oid': '52fdfb400b9398049f3d0fc3'},[1990s],driving
36031,Two,http://images.musicnet.com/albums/048/171/165/...,r&b,Work It Out,Kotchy,WXVcC2mxlU0,213,"[11, 0.275062, 0.068745, 85.022, 0.10993399999...",[sexual],,[sexy],69919.0,[],{'$oid': '52fdfb440b9398049f3d6c07'},[],sexy
36666,Nona Hendryx,http://images.musicnet.com/albums/071/193/297/...,r&b,Problem,Nona Hendryx,Ed1-6muTgIU,24,[],[city cruising],,[rush hour],,[],{'$oid': '52fdfb420b9398049f3d3adb'},[],driving
36688,I Do Not Want What I Haven't Got (Special Edit...,http://images.musicnet.com/albums/028/025/657/...,r&b,Nothing Compares 2 U (2009 Digital Remaster),Sinéad O'Connor,T2_5rhtprKk,18,[],,,"[funky, lush]",70342.0,"[it, s, been, seven, hours, and, fifteen, days...",{'$oid': '52fdfb440b9398049f3d6f46'},[1990s],


In [155]:
def final_result(list_moods, genres):
    final_df = get_similar_song(list_moods, genres)
    final_df['genres'] = final_df['genres'].apply(lambda x: ''.join(x))
    result = final_df[~final_df['moods'].isna()][['artist','name']]
    dic = []
    for i in range(0,result.shape[0]):
        dic.append(result.iloc[i].to_list())
    res_dic = []
    for i in dic:
        res_dic.append(Song(artist=i[0], title=i[1]))
    
    final_result_dictionary = dict(playlist=res_dic)       
    final_result_dictionary['genre'] = genres
    final_result_dictionary['moods'] = list_moods

    return final_result_dictionary

In [161]:
final_result_dictionary = final_result(['energetic'], 'rock')
final_result_dictionary['playlist'] = final_result_dictionary['playlist'][0:10]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['moods'] = new_df['moods'].apply(lambda x: is_in(x,list_moods))


In [176]:
final_result_dictionary['playlist']

[Song(artist='The Verve', title='Bitter Sweet Symphony'),
 Song(artist='blink-182', title='All The Small Things'),
 Song(artist='The Offspring', title="The Kids Aren't Alright"),
 Song(artist='Coldplay', title='Clocks'),
 Song(artist='Panic! At The Disco', title='The Ballad Of Mona Lisa'),
 Song(artist='Metro Station', title='Shake It'),
 Song(artist='Fall Out Boy', title='Dance, Dance'),
 Song(artist='blink-182', title="Adam's Song"),
 Song(artist='X', title='White Girl (Single Mix)'),
 Song(artist='The All-American Rejects', title='Dirty Little Secret')]

In [183]:
for i in range (0,10):
    print(final_result_dictionary['playlist'][i][0],": ", final_result_dictionary['playlist'][i][1])

The Verve :  Bitter Sweet Symphony
blink-182 :  All The Small Things
The Offspring :  The Kids Aren't Alright
Coldplay :  Clocks
Panic! At The Disco :  The Ballad Of Mona Lisa
Metro Station :  Shake It
Fall Out Boy :  Dance, Dance
blink-182 :  Adam's Song
X :  White Girl (Single Mix)
The All-American Rejects :  Dirty Little Secret


In [206]:
final_result_dictionary['playlist'][i][1]

'Bitter Sweet Symphony'

In [None]:
final_result_dictionary['playlist'][i][1]

In [283]:
tracksUri = []
for i in range(len(final_result_dictionary['playlist'])-1):
    artist = final_result_dictionary['playlist'][i][0]
    track = final_result_dictionary['playlist'][i][1]
    
    print(i, artist, track)
    results = spotify.search(q="artist:" + artist + " track:" + track, type="track")
    items = results['tracks']["items"]
    if len(items)>0:
        tracksUri.append(items[0]['uri'])

0 The Verve Bitter Sweet Symphony
1 blink-182 All The Small Things
2 The Offspring The Kids Aren't Alright
3 Coldplay Clocks
4 Panic! At The Disco The Ballad Of Mona Lisa
5 Metro Station Shake It
6 Fall Out Boy Dance, Dance
7 blink-182 Adam's Song
8 X White Girl (Single Mix)


In [284]:
tracksUri

['spotify:track:57iDDD9N9tTWe75x6qhStw',
 'spotify:track:2m1hi0nfMR9vdGC8UcrnwU',
 'spotify:track:4EchqUKQ3qAQuRNKmeIpnf',
 'spotify:track:0BCPKOYdS2jbQ8iyB56Zns',
 'spotify:track:5rLi8B8qgk6qThwRnKHW2P',
 'spotify:track:5619Ojc6t9evEEs3B7Drhe',
 'spotify:track:0a7BloCiNzLDD9qSQHh5m7',
 'spotify:track:6xpDh0dXrkVp0Po1qrHUd8']

In [21]:
final_df['genres'] = final_df['genres'].apply(lambda x: ''.join(x))

In [22]:
final_df = get_similar_song(['energetic', 'happy'], 'r&b')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df['moods'] = temp_df['moods'].apply(lambda x: is_in(x,list_moods))


In [53]:
result = final_df[~final_df['moods'].isna()][['artist','name']]

In [54]:
result.iloc[1].to_list()

['Beyoncé', 'Love On Top']

In [63]:
dic = []
for i in range(0,result.shape[0]):
    dic.append(result.iloc[i].to_list())

In [68]:
res_dic = []
for i in dic:
    res_dic.append(Song(artist=i[0], title=i[1]))

In [76]:
final_result_dictionary = dict(playlist=res_dic)
final_result_dictionary['genre'] = 'r&b'
final_result_dictionary['moods'] = ['energetic','happy']

In [196]:
pp.pprint(final_result_dictionary)

{'genre': 'rock', 'moods': ['energetic'], 'playlist': []}


In [157]:
# makes printing a dict look nice
from collections import namedtuple
import pprint
pp = pprint.PrettyPrinter(indent=4)
Song = namedtuple("Song", ["artist", "title"])

In [159]:
from collections import namedtuple

In [160]:
Song = namedtuple("Song", ["artist", "title"])

song_1 = Song(artist="kanye west", title='i am a god')
song_2 = Song(artist="linkin park", title="crawling")
# etc.

# Return your results as a dict containing a key called 'playlist' , which contains a list of the song tuples.
final_result_dictionary = dict(playlist=[song_1, song_2]) # this example uses only 2 songs but you need to return 10 :)

pp.pprint(final_result_dictionary)

{   'playlist': [   Song(artist='kanye west', title='i am a god'),
                    Song(artist='linkin park', title='crawling')]}


In [31]:
Mood = namedtuple("Mood", ["description", "probability"])

top_mood_1 = Mood(description='Happy', probability=0.75)
top_mood_2 = Mood(description='Energetic', probability=0.71)
top_mood_3 = Mood(description='Celebratory', probability=0.68)

final_result_dictionary['mood'] = [top_mood_1, top_mood_2, top_mood_3]
final_result_dictionary['genre'] = 'Jazz'

pp.pprint(final_result_dictionary)

{   'genre': 'Jazz',
    'mood': [   Mood(description='Happy', probability=0.75),
                Mood(description='Energetic', probability=0.71),
                Mood(description='Celebratory', probability=0.68)],
    'playlist': [   Song(artist='kanye west', title='i am a god'),
                    Song(artist='linkin park', title='crawling')]}


In [197]:
def recommend_similar_songs(audio_features, lyrics_features):
    lyric_model = pickle.load(open('model_lyrics.pkl', 'rb'))
    tfidf_vec = pickle.load(open('tfidf_vec.pkl','rb'))
    tfidf = pickle.load(open('tfidf.pkl','rb'))
    mlb = pickle.load(open('mlb.pkl','rb'))
    pickled_model = pickle.load(open('model.pkl', 'rb'))

    data = np.array(audio_features).reshape(1,-1)
    genres = pickled_model.predict(data)[0]
    list_moods = [mlb.inverse_transform(lyric_model.predict(tfidf_vec.transform([lyrics_features])))[0][0]]

    final_df = get_similar_song(list_moods, genres)
    
    final_df['genres'] = final_df['genres'].apply(lambda x: ''.join(x))
    result = final_df[~final_df['moods'].isna()][['artist','name']]
    print(result)
    dic = []
    for i in range(0,result.shape[0]):
        dic.append(result.iloc[i].to_list())
    res_dic = []
    for i in dic:
        res_dic.append(Song(artist=i[0], title=i[1]))

    final_result_dictionary = dict(playlist=res_dic)       
    final_result_dictionary['genre'] = genres
    final_result_dictionary['moods'] = list_moods

    pp.pprint(final_result_dictionary)
    return final_result_dictionary

In [198]:
data = np.array([[ 5.0000000e+00,  6.1600000e-01,  9.2700000e-02,  1.4808800e+02,
         3.2400000e-02,  1.8200000e-01,  0.0000000e+00,  4.0000000e+00,
         2.4209642e+02, -7.9640000e+00,  7.1900000e-01,  5.8600000e-01,
         1.0000000e+00,  9.1900000e-01,  2.4700000e-01,  1.6200000e-01,
         2.5600000e-01]])

In [199]:
lyric = 'say baby say baby say baby anderson paak  bruno mars what you doin what you doin where you at where you at oh you got plans you got plans dont say that shut your trap im sippin wine sip sip in a robe drip drip i look too good look too good to be alone woo woo my house clean house clean my pool warm pool warm just shaved smooth like a newborn we should be dancin romancin in the east wing and the west wing of this mansion whats happenin  pre  bruno mars i aint playin no games every word that i say is coming straight from the heart so if you tryna lay in these arms bruno mars ima leave the door open ima leave the door open ima leave the door open girl ima leave the door open hopin that you feel the way i feel and you want me like i want you tonight baby tell me that youre comin through you might also like  anderson paak  bruno mars ooh youre so sweet so sweet so tight so tight i wont bite uhuh unless you like unless you like if you smoke what you smoke i got the haze purple haze and if youre hungry girl i got filets woo ooh baby dont keep me waitin theres so much love we could be makin shamone im talkin kissin cuddlin rose petals in the bathtub girl lets jump in its bubblin  pre  bruno mars i aint playin no games every word that i say is coming straight from the heart so if you tryna lay in these arms if you tryna lay in these arms bruno mars ima leave the door open ima leave the door open ima leave the door open girl ima leave the door open hopin that you feel the way i feel and you want me like i want you tonight baby tell me that youre comin through come on girl  bridge bruno mars  anderson paak lalalalalalala i need you baby lalalalalalala i got to see you baby lalalalalalala girl im tryna give you this ah bruno mars hey hey ima leave my door open baby ima leave the door open ima leave ima leave my door open girl ima leave the door open hopin and im hopin hopin that you feel the way i feel and you want me like i want you tonight baby tell me that youre comin through woo bruno mars  anderson paak lalalalalalala tell me tell me that youre comin through woowoowoowoo woowoowoo woowoowoo woowoowoowoo woowoowoo woowoo lalalalalalala lalalalala tell me that youre comin through girl im here just waitin for you oh come on over ill adore you i gotta know lalalalalalala im waitin waitin waitin tell me that youre comin through for you girl im here just waitin for you come on over ill adore you lalalalalalala'

In [200]:
recommend_similar_songs(data, lyric)

Empty DataFrame
Columns: [artist, name]
Index: []
{'genre': 'r&b', 'moods': ['energetic'], 'playlist': []}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['moods'] = new_df['moods'].apply(lambda x: is_in(x,list_moods))


{'playlist': [], 'genre': 'r&b', 'moods': ['energetic']}

In [255]:
token = 'BQBgPykQynLqalAztS3hf2911N8_HhONfdy2NEhQcI7moPnaZSOHO77LoSpUIzY2gn5u8itjKyWLLhi55Bt998ogwBb3b57rT2LF5r9JV7PalYa3RckgzHyA30DrMItHAlO5b0k9hBKP_2IF99hFyYaxcnABKifl14Ou9sLWtH3F9yTkVcvhtiPxyehOeXD3rdJ-PoGqEnBV_CvBuYHa-0jC15yQlNUc0TdO0An-hq0SEF8o_DSh4l5kWdd6ruU-5mLphVXELSkX1_W30KdzEZMMnxf0';
import spotipy
import sys
from spotipy.oauth2 import SpotifyClientCredentials

CLIENT_ID='5f9548cc54f242bb97c58c5dfeb884c5'
CLIENT_SECRET='03ebdb028ed6454ba33102e925f1fcc1'
USER_ID = '317u5f5phywzysmnov6x6tppkrxq'

In [240]:
token = 'BQBgPykQynLqalAztS3hf2911N8_HhONfdy2NEhQcI7moPnaZSOHO77LoSpUIzY2gn5u8itjKyWLLhi55Bt998ogwBb3b57rT2LF5r9JV7PalYa3RckgzHyA30DrMItHAlO5b0k9hBKP_2IF99hFyYaxcnABKifl14Ou9sLWtH3F9yTkVcvhtiPxyehOeXD3rdJ-PoGqEnBV_CvBuYHa-0jC15yQlNUc0TdO0An-hq0SEF8o_DSh4l5kWdd6ruU-5mLphVXELSkX1_W30KdzEZMMnxf0';
import spotipy
import sys
from spotipy.oauth2 import SpotifyClientCredentials

CLIENT_ID='209bafc9b6db4ac2926f963e3f625059'
CLIENT_SECRET='bae043586f964fb19795682b2c501553'
USER_ID = '31qf6kxithhejdlybagvsvukiosu'

spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials(CLIENT_ID, CLIENT_SECRET))

artist = 'The Verve'
track = 'Bitter Sweet Symphony'

results = spotify.search(q="artist:" + artist + " track:" + track, type="track")
items = results['tracks']["items"]

In [253]:
token = 'BQBgPykQynLqalAztS3hf2911N8_HhONfdy2NEhQcI7moPnaZSOHO77LoSpUIzY2gn5u8itjKyWLLhi55Bt998ogwBb3b57rT2LF5r9JV7PalYa3RckgzHyA30DrMItHAlO5b0k9hBKP_2IF99hFyYaxcnABKifl14Ou9sLWtH3F9yTkVcvhtiPxyehOeXD3rdJ-PoGqEnBV_CvBuYHa-0jC15yQlNUc0TdO0An-hq0SEF8o_DSh4l5kWdd6ruU-5mLphVXELSkX1_W30KdzEZMMnxf0';

In [266]:
import json
import requests
user_id = "YOUR_USER_ID"
endpoint_url = "https://api.spotify.com/v1/users/{}/playlists".format(USER_ID)
request_body = json.dumps({
          "name": "Test tao playlist",
          "description": "None",
          "public": True
        })
response = requests.post(url = endpoint_url, data = request_body, headers={"Content-Type":"application/json", 
                        "Authorization":"Bearer {}".format(token)})

playlist_id = response.json()['id']
endpoint_url = "https://api.spotify.com/v1/playlists/{}/tracks?uris={}".format(playlist_id, ','.join(tracksUri))
response_final = requests.post(url = endpoint_url,headers={"Content-Type":"application/json", 
                        "Authorization":"Bearer {}".format(token)})

In [285]:
playlist_id = response.json()['id']
endpoint_url = "https://api.spotify.com/v1/playlists/{}/tracks?uris={}".format(playlist_id, ','.join(tracksUri))
response_final = requests.post(url = endpoint_url,headers={"Content-Type":"application/json", 
                        "Authorization":"Bearer {}".format(token)})

In [291]:
playlist_id = response.json()['id']
playlist_id

'3sNWyEPiUPpeAKPzpHLFUw'

In [294]:
token = 'BQBfCcUGvfFpLnUzJX87WsyezgzCDtAu_wAYQZCMtLoP0LyvMQa0o05RgaVRsPj-p5_bKeWl3G4dGiLary6TRz2blLVJKN5Kr6tL6dg7s5Y25Qb_tlCXj53wfLNt0lTN-xOneKThNSHjoTElYV6E8Y3zjStUfgxiqgsW0eOOQfhMf4b8ByFKnIglQazeOUNIjENTV4TJH1Mi5TQJ6wJi-B04jKp9zteW_bZ3Ax_CBjG-a_7VKRQbF_KwlsylLzJnyKqIgM6pOdOK6KeuZFGFE4HtoNSW';
CLIENT_ID ='5f9548cc54f242bb97c58c5dfeb884c5'
CLIENT_SECRET='03ebdb028ed6454ba33102e925f1fcc1'
USER_ID = '317u5f5phywzysmnov6x6tppkrxq'
tracksUri = []
for i in range(len(final_result_dictionary['playlist'])-1):
    artist = final_result_dictionary['playlist'][i][0]
    track = final_result_dictionary['playlist'][i][1]

    results = spotify.search(q="artist:" + artist + " track:" + track, type="track")
    items = results['tracks']["items"]
    if len(items)>0:
        print(items[0]['uri'])
        tracksUri.append(items[0]['uri'])

# tao playlist
endpoint_url = "https://api.spotify.com/v1/users/{}/playlists".format(USER_ID)
request_body = json.dumps({
        "name": "Test tao playlist",
        "description": "None",
        "public": True
        })
response = requests.post(url = endpoint_url, data = request_body, headers={"Content-Type":"application/json", 
                        "Authorization":"Bearer {}".format(token)})

#add bai hat
playlist_id = response.json()['id']
endpoint_url = "https://api.spotify.com/v1/playlists/{}/tracks?uris={}".format(playlist_id, ','.join(tracksUri))
response_final = requests.post(url = endpoint_url,headers={"Content-Type":"application/json", 
                        "Authorization":"Bearer {}".format(token)})


spotify:track:57iDDD9N9tTWe75x6qhStw
spotify:track:2m1hi0nfMR9vdGC8UcrnwU
spotify:track:4EchqUKQ3qAQuRNKmeIpnf
spotify:track:0BCPKOYdS2jbQ8iyB56Zns
spotify:track:5rLi8B8qgk6qThwRnKHW2P
spotify:track:5619Ojc6t9evEEs3B7Drhe
spotify:track:0a7BloCiNzLDD9qSQHh5m7
spotify:track:6xpDh0dXrkVp0Po1qrHUd8


In [304]:
token = 'BQA_leaA7wOD9-hqZKM9z-KfKX1u-bxyPTVjOMdKhyXdlZb0pDkp4LYvphYPFhyah_37TEcX6lmpPl9r8vA_kCi1nHU6cKfTzUR7wFTVqMlhGIrWo7VevMtIlv-vmDyfWKQOykCciSJpdoFua1Q918rQUtNObNXvHapOapNsRFGeYwxyuB-QlJtgGwz0aJg9sNtVqjL94ruitHPrFKnoJ2tZJgwmLi3XBvsebjUGcSBfUSdUZtpk4Z4kdWHMUt2cad8CYYN1Ly-frmEGXUB4v7oAVtc6';
CLIENT_ID ='5f9548cc54f242bb97c58c5dfeb884c5'
CLIENT_SECRET='03ebdb028ed6454ba33102e925f1fcc1'
USER_ID = '317u5f5phywzysmnov6x6tppkrxq'
tracksUri = []
final_result_dictionary = {'genre': 'rock', 'moods': ['energetic'], 'playlist': [['The Verve', 'Bitter Sweet Symphony'], ['blink-182', 'All The Small Things'], ['The Offspring', "The Kids Aren't Alright"], ['Coldplay', 'Clocks'], ['Panic! At The Disco', 'The Ballad Of Mona Lisa'], ['Metro Station', 'Shake It'], ['Fall Out Boy', 'Dance, Dance'], ['blink-182', "Adam's Song"], ['X', 'White Girl (Single Mix)'], ['The All-American Rejects', 'Dirty Little Secret']]}
for i in range(len(final_result_dictionary['playlist'])-1):
    artist = final_result_dictionary['playlist'][i][0]
    track = final_result_dictionary['playlist'][i][1]

    results = spotify.search(q="artist:" + artist + " track:" + track, type="track")
    items = results['tracks']["items"]
    if len(items)>0:
        print(items[0]['uri'])
        tracksUri.append(items[0]['uri'])
# tao playlist
endpoint_url = "https://api.spotify.com/v1/users/{}/playlists".format(USER_ID)
request_body = json.dumps({
        "name": "Test tao playlist",
        "description": "None",
        "public": True
        })
response = requests.post(url = endpoint_url, data = request_body, headers={"Content-Type":"application/json", 
                        "Authorization":"Bearer {}".format(token)})

#add bai hat
playlist_id = response.json()['id']
endpoint_url = "https://api.spotify.com/v1/playlists/{}/tracks?uris={}".format(playlist_id, ','.join(tracksUri))
response_final = requests.post(url = endpoint_url,headers={"Content-Type":"application/json", 
                        "Authorization":"Bearer {}".format(token)})

return render_template("playlistdemo.html")

spotify:track:57iDDD9N9tTWe75x6qhStw
spotify:track:2m1hi0nfMR9vdGC8UcrnwU
spotify:track:4EchqUKQ3qAQuRNKmeIpnf
spotify:track:0BCPKOYdS2jbQ8iyB56Zns
spotify:track:5rLi8B8qgk6qThwRnKHW2P
spotify:track:5619Ojc6t9evEEs3B7Drhe
spotify:track:0a7BloCiNzLDD9qSQHh5m7
spotify:track:6xpDh0dXrkVp0Po1qrHUd8


SyntaxError: 'return' outside function (562303219.py, line 32)

In [None]:
tracksUri = [
  'spotify:track:5EejTyYwzTK8VsolyWqtzj','spotify:track:5n0uDcarEdMJvk2ab8C84Y','spotify:track:2w4Ex6ObV9rdYLkE4A9jV6','spotify:track:2mSCxOFDrjOH4Nfzqf9uKn','spotify:track:4sccoCUvKgarfGmvfAWoVV','spotify:track:4MZkrUWN2C0dhc7Xn1LPug','spotify:track:7g4WvO2YcBKC64ZsPGhpZG','spotify:track:2HbayD6vTHnDlmG3m19NHT','spotify:track:4FTH4NljhFV6cA4wdPM8G2','spotify:track:6VtfS06eULTWH32DwD5ps2'
]

In [295]:
test = {'genre': 'rock', 'moods': ['energetic'], 'playlist': [['The Verve', 'Bitter Sweet Symphony'], ['blink-182', 'All The Small Things'], ['The Offspring', "The Kids Aren't Alright"], ['Coldplay', 'Clocks'], ['Panic! At The Disco', 'The Ballad Of Mona Lisa'], ['Metro Station', 'Shake It'], ['Fall Out Boy', 'Dance, Dance'], ['blink-182', "Adam's Song"], ['X', 'White Girl (Single Mix)'], ['The All-American Rejects', 'Dirty Little Secret']]}

In [302]:
final_result_dictionary = {'genre': 'rock', 'moods': ['energetic'], 'playlist': [['The Verve', 'Bitter Sweet Symphony'], ['blink-182', 'All The Small Things'], ['The Offspring', "The Kids Aren't Alright"], ['Coldplay', 'Clocks'], ['Panic! At The Disco', 'The Ballad Of Mona Lisa'], ['Metro Station', 'Shake It'], ['Fall Out Boy', 'Dance, Dance'], ['blink-182', "Adam's Song"], ['X', 'White Girl (Single Mix)'], ['The All-American Rejects', 'Dirty Little Secret']]}

In [None]:
def playlistdemo():
    args = request.get_json(force=True)
    final_result_dictionary = args["recs"]
    
    tracksUri = []
    final_result_dictionary = {'genre': 'rock', 'moods': ['energetic'], 'playlist': [['The Verve', 'Bitter Sweet Symphony'], ['blink-182', 'All The Small Things'], ['The Offspring', "The Kids Aren't Alright"], ['Coldplay', 'Clocks'], ['Panic! At The Disco', 'The Ballad Of Mona Lisa'], ['Metro Station', 'Shake It'], ['Fall Out Boy', 'Dance, Dance'], ['blink-182', "Adam's Song"], ['X', 'White Girl (Single Mix)'], ['The All-American Rejects', 'Dirty Little Secret']]}
    for i in range(len(final_result_dictionary['playlist'])-1):
        artist = final_result_dictionary['playlist'][i][0]
        track = final_result_dictionary['playlist'][i][1]

        results = spotify.search(q="artist:" + artist + " track:" + track, type="track")
        items = results['tracks']["items"]
        if len(items)>0:
            print(items[0]['uri'])
            tracksUri.append(items[0]['uri'])
    # tao playlist
    endpoint_url = "https://api.spotify.com/v1/users/{}/playlists".format(USER_ID)
    request_body = json.dumps({
            "name": "Test tao playlist",
            "description": "None",
            "public": True
            })
    response = requests.post(url = endpoint_url, data = request_body, headers={"Content-Type":"application/json", 
                            "Authorization":"Bearer {}".format(token)})

    #add bai hat
    playlist_id = response.json()['id']
    endpoint_url = "https://api.spotify.com/v1/playlists/{}/tracks?uris={}".format(playlist_id, ','.join(tracksUri))
    response_final = requests.post(url = endpoint_url,headers={"Content-Type":"application/json", 
                            "Authorization":"Bearer {}".format(token)})
    return render_template("playlistdemo.html")