In [1]:
import pandas as pd
import numpy as np
import pickle
import seaborn as sns
from random import choice
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_distances, euclidean_distances
from sklearn.decomposition import LatentDirichletAllocation

from pymongo import MongoClient

## builds and pickles dataframe and album ID key dictionary for recommender system

In [2]:
client = MongoClient('localhost', 27017)
db = client['album_info']
album_coll = db['albums']
lyric_coll = db['lyrics']
genre_coll = db['genres']
feat_coll = db['feat_df']

In [3]:
albums, lyrics, genres, features= [], [], [], []

album_cursor = album_coll.find({})
for document in album_cursor:
    albums.append(document)
    
lyric_cursor = lyric_coll.find({})
for document in lyric_cursor:
    lyrics.append(document)
    
genre_cursor = genre_coll.find({})
for document in genre_cursor:
    genres.append(document)
    
feat_cursor = feat_coll.find({})
for document in feat_cursor:
    features.append(document)

In [4]:
len(albums), len(lyrics), len(genres), len(features)

(838, 838, 838, 838)

In [5]:
albums_df = pd.DataFrame(albums)
lyrics_df = pd.DataFrame(lyrics)
genres_df = pd.DataFrame(genres)
featur_df = pd.DataFrame(features)

In [6]:
albums_df.pop('_id');
albums_df.pop('genres');

In [7]:
albums_cp = albums_df.copy()

In [8]:
new_album_df = pd.DataFrame()

In [9]:
# clean cols
artist = [] 

for a in albums_cp.artist:
    artist.append(a[0])
    
artist_id = [] 

for a in albums_cp.artist_id:
    artist_id.append(a[0])
    
album = [] 

for a in albums_cp.album:
    album.append(a[0])
    
album_type = []

for a in albums_cp.album_type:
    album_type.append(a[0])
    
album_id = []

for a in albums_cp.album_id:
    album_id.append(a[0])
    
album_label = [] 

for a in albums_cp.album_label:
    album_label.append(a[0])
    
album_popularity = [] 

for a in albums_cp.album_popularity:
    album_popularity.append(a[0])
    
release_date = []

for a in albums_cp.release_date:
    release_date.append(a[0])
    
release_prec = []

for a in albums_cp.release_prec:
    release_prec.append(a[0])
    
album_release = []

for a in albums_cp.album_release:
    album_release.append(a[0])
    
upc_code = []

for a in albums_cp.upc_code:
    if type(a) == list:
        upc_code.append(a[0])
    else:
        upc_code.append(a)

new_album_df = pd.DataFrame()

new_album_df['artist'] = artist
new_album_df['artist_id'] = artist_id
new_album_df['album'] = album
new_album_df['album_type'] = album_type
new_album_df['album_id'] = album_id
new_album_df['album_label'] = album_label
new_album_df['album_popularity'] = album_popularity
new_album_df['release_date'] = release_date
new_album_df['release_prec'] = release_prec
new_album_df['album_release'] = album_release
new_album_df['upc_code'] = upc_code

In [10]:
albums_df = new_album_df
albums_df.head()

Unnamed: 0,artist,artist_id,album,album_type,album_id,album_label,album_popularity,release_date,release_prec,album_release,upc_code
0,Eleanor Arroway,3x2VCXXsd5bySXXdbhLZPJ,Reaching Out,single,3RoG4xBbhThunES2bojEm7,Firefly Entertainment,53,2019-08-19,day,Reaching Out by Eleanor Arroway,
1,The Dangerous Summer,0iMnpaEHXkgMT956CmP1kj,Reach For The Sun,album,1awdBA8DLv6G5eBvdGxk8T,Hopeless Records,53,2009-05-05,day,Reach For The Sun by The Dangerous Summer,790692070862.0
2,Blackmill,4kjWnaLfIRcLJ1Dy4Wr6tY,Reach For Glory,album,0XHpO9qTpqJJQwa2zFxAAE,Blackmill,52,2011-02-28,day,Reach For Glory by Blackmill,859705377371.0
3,potsu,5XE0fiZWGbq9TcSuWwJ1fA,Reaching For a Star,album,5bcobCRR5ovNoZWAp3iJgG,potsu,53,2020-07-12,day,Reaching For a Star by potsu,193436222020.0
4,Chama Wijnen,7ftRhte6NNzoF9MIKfKJsP,Reaching,single,1coFYbyXU2dxO3lrUsbdIy,Chama Wijnen,56,2019-05-25,day,Reaching by Chama Wijnen,7350109080827.0


In [11]:
lyrics_df.pop('_id');
lyrics_df.lyrics = lyrics_df.lyrics.replace(r'^\s*$', np.NaN, regex=True)
lyrics_df

Unnamed: 0,album_id,lyrics
0,3RoG4xBbhThunES2bojEm7,
1,1awdBA8DLv6G5eBvdGxk8T,"In the back of my mind, well I fought, my Go..."
2,0XHpO9qTpqJJQwa2zFxAAE,Original video InstrumentalInstrumentalInstru...
3,5bcobCRR5ovNoZWAp3iJgG,They said that they want me No matt...
4,1coFYbyXU2dxO3lrUsbdIy,
...,...,...
833,2fYhqwDWXjbpjaIJPEfKFw,"Step up, the two of us, nobody knows us Get ..."
834,0g1F5eGVwX4Sxi1n8ojPkE,I couldn't help but notice you were sitting ...
835,4gvQO5mEuhbMSrLIuwXkmz,"Yeh yeh, yeh yeh, yeh yeh La calle est ap..."
836,6x4UQQ7TVOja89iXV047Zo,Bazzi No Way! Megan Thee Stallion Hot Girl...


In [12]:
final_df = albums_df.merge(lyrics_df, on='album_id')

In [13]:
final_df.head()

Unnamed: 0,artist,artist_id,album,album_type,album_id,album_label,album_popularity,release_date,release_prec,album_release,upc_code,lyrics
0,Eleanor Arroway,3x2VCXXsd5bySXXdbhLZPJ,Reaching Out,single,3RoG4xBbhThunES2bojEm7,Firefly Entertainment,53,2019-08-19,day,Reaching Out by Eleanor Arroway,,
1,The Dangerous Summer,0iMnpaEHXkgMT956CmP1kj,Reach For The Sun,album,1awdBA8DLv6G5eBvdGxk8T,Hopeless Records,53,2009-05-05,day,Reach For The Sun by The Dangerous Summer,790692070862.0,"In the back of my mind, well I fought, my Go..."
2,Blackmill,4kjWnaLfIRcLJ1Dy4Wr6tY,Reach For Glory,album,0XHpO9qTpqJJQwa2zFxAAE,Blackmill,52,2011-02-28,day,Reach For Glory by Blackmill,859705377371.0,Original video InstrumentalInstrumentalInstru...
3,potsu,5XE0fiZWGbq9TcSuWwJ1fA,Reaching For a Star,album,5bcobCRR5ovNoZWAp3iJgG,potsu,53,2020-07-12,day,Reaching For a Star by potsu,193436222020.0,They said that they want me No matt...
4,Chama Wijnen,7ftRhte6NNzoF9MIKfKJsP,Reaching,single,1coFYbyXU2dxO3lrUsbdIy,Chama Wijnen,56,2019-05-25,day,Reaching by Chama Wijnen,7350109080827.0,


In [14]:
genres_df.pop('_id')
genres_df.head()

Unnamed: 0,album_id,genres
0,3RoG4xBbhThunES2bojEm7,[background music]
1,1awdBA8DLv6G5eBvdGxk8T,"[anthem emo, dreamo, modern alternative rock, ..."
2,0XHpO9qTpqJJQwa2zFxAAE,"[chillstep, substep]"
3,5bcobCRR5ovNoZWAp3iJgG,"[japanese chillhop, lo-fi beats]"
4,1coFYbyXU2dxO3lrUsbdIy,[]


In [15]:
genres_df.head()

Unnamed: 0,album_id,genres
0,3RoG4xBbhThunES2bojEm7,[background music]
1,1awdBA8DLv6G5eBvdGxk8T,"[anthem emo, dreamo, modern alternative rock, ..."
2,0XHpO9qTpqJJQwa2zFxAAE,"[chillstep, substep]"
3,5bcobCRR5ovNoZWAp3iJgG,"[japanese chillhop, lo-fi beats]"
4,1coFYbyXU2dxO3lrUsbdIy,[]


In [16]:
pickle.dump(genres_df, open('genres_df.pkl', 'wb'))

In [17]:
featur_df.pop('_id');
featur_df.head()

Unnamed: 0,album_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,3RoG4xBbhThunES2bojEm7,0.1313,0.119325,4.75,-23.6635,1.0,0.03865,0.86925,0.8575,0.0886,0.06895,99.83425,242864.5,3.0
1,1awdBA8DLv6G5eBvdGxk8T,0.462455,0.873273,5.0,-4.239455,0.818182,0.062891,0.009349,2.3e-05,0.1856,0.559455,136.793,224471.454545,3.818182
2,0XHpO9qTpqJJQwa2zFxAAE,0.566727,0.619909,5.454545,-10.143727,0.272727,0.042245,0.165232,0.281491,0.204318,0.257136,139.569182,275057.363636,3.909091
3,5bcobCRR5ovNoZWAp3iJgG,0.643357,0.482071,3.571429,-10.355214,0.571429,0.132664,0.659143,0.318495,0.1842,0.362143,106.973857,177415.5,4.0
4,1coFYbyXU2dxO3lrUsbdIy,0.313,0.00729,9.0,-29.599,1.0,0.0388,0.993,0.796,0.106,0.0799,72.082,176500.0,4.0


In [18]:
final_df = final_df.merge(genres_df, on='album_id')
final_df.head()

Unnamed: 0,artist,artist_id,album,album_type,album_id,album_label,album_popularity,release_date,release_prec,album_release,upc_code,lyrics,genres
0,Eleanor Arroway,3x2VCXXsd5bySXXdbhLZPJ,Reaching Out,single,3RoG4xBbhThunES2bojEm7,Firefly Entertainment,53,2019-08-19,day,Reaching Out by Eleanor Arroway,,,[background music]
1,The Dangerous Summer,0iMnpaEHXkgMT956CmP1kj,Reach For The Sun,album,1awdBA8DLv6G5eBvdGxk8T,Hopeless Records,53,2009-05-05,day,Reach For The Sun by The Dangerous Summer,790692070862.0,"In the back of my mind, well I fought, my Go...","[anthem emo, dreamo, modern alternative rock, ..."
2,Blackmill,4kjWnaLfIRcLJ1Dy4Wr6tY,Reach For Glory,album,0XHpO9qTpqJJQwa2zFxAAE,Blackmill,52,2011-02-28,day,Reach For Glory by Blackmill,859705377371.0,Original video InstrumentalInstrumentalInstru...,"[chillstep, substep]"
3,potsu,5XE0fiZWGbq9TcSuWwJ1fA,Reaching For a Star,album,5bcobCRR5ovNoZWAp3iJgG,potsu,53,2020-07-12,day,Reaching For a Star by potsu,193436222020.0,They said that they want me No matt...,"[japanese chillhop, lo-fi beats]"
4,Chama Wijnen,7ftRhte6NNzoF9MIKfKJsP,Reaching,single,1coFYbyXU2dxO3lrUsbdIy,Chama Wijnen,56,2019-05-25,day,Reaching by Chama Wijnen,7350109080827.0,,[]


In [19]:
final_df = final_df.merge(featur_df, on='album_id')

In [20]:
final_df.head()

Unnamed: 0,artist,artist_id,album,album_type,album_id,album_label,album_popularity,release_date,release_prec,album_release,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Eleanor Arroway,3x2VCXXsd5bySXXdbhLZPJ,Reaching Out,single,3RoG4xBbhThunES2bojEm7,Firefly Entertainment,53,2019-08-19,day,Reaching Out by Eleanor Arroway,...,-23.6635,1.0,0.03865,0.86925,0.8575,0.0886,0.06895,99.83425,242864.5,3.0
1,The Dangerous Summer,0iMnpaEHXkgMT956CmP1kj,Reach For The Sun,album,1awdBA8DLv6G5eBvdGxk8T,Hopeless Records,53,2009-05-05,day,Reach For The Sun by The Dangerous Summer,...,-4.239455,0.818182,0.062891,0.009349,2.3e-05,0.1856,0.559455,136.793,224471.454545,3.818182
2,Blackmill,4kjWnaLfIRcLJ1Dy4Wr6tY,Reach For Glory,album,0XHpO9qTpqJJQwa2zFxAAE,Blackmill,52,2011-02-28,day,Reach For Glory by Blackmill,...,-10.143727,0.272727,0.042245,0.165232,0.281491,0.204318,0.257136,139.569182,275057.363636,3.909091
3,potsu,5XE0fiZWGbq9TcSuWwJ1fA,Reaching For a Star,album,5bcobCRR5ovNoZWAp3iJgG,potsu,53,2020-07-12,day,Reaching For a Star by potsu,...,-10.355214,0.571429,0.132664,0.659143,0.318495,0.1842,0.362143,106.973857,177415.5,4.0
4,Chama Wijnen,7ftRhte6NNzoF9MIKfKJsP,Reaching,single,1coFYbyXU2dxO3lrUsbdIy,Chama Wijnen,56,2019-05-25,day,Reaching by Chama Wijnen,...,-29.599,1.0,0.0388,0.993,0.796,0.106,0.0799,72.082,176500.0,4.0


In [21]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 838 entries, 0 to 837
Data columns (total 26 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   artist            838 non-null    object 
 1   artist_id         838 non-null    object 
 2   album             838 non-null    object 
 3   album_type        838 non-null    object 
 4   album_id          838 non-null    object 
 5   album_label       838 non-null    object 
 6   album_popularity  838 non-null    int64  
 7   release_date      838 non-null    object 
 8   release_prec      838 non-null    object 
 9   album_release     838 non-null    object 
 10  upc_code          831 non-null    object 
 11  lyrics            742 non-null    object 
 12  genres            838 non-null    object 
 13  danceability      838 non-null    float64
 14  energy            838 non-null    float64
 15  key               838 non-null    float64
 16  loudness          838 non-null    float64
 1

In [22]:
# dfs = pickle.load(open('dfs_w_lyrics.pkl', 'rb'))
# tracks = pd.concat(dfs)
# tracks.reset_index(inplace=True)
# lead_artist = []
# feat_artists = []

# for artists in tracks.artist:
#     lead_artist.append(artists[0])
#     feat_artists.append(artists[1:])

# tracks['lead_artist'] = lead_artist
# tracks['feat_artists'] = feat_artists
# tracks['album_release'] = tracks.album + ' by ' + tracks.lead_artist

In [23]:
# cnt = 0
# for alb in tracks.album.unique():
#     if alb not in final_df.album_id:
#         cnt += 1
        
# cnt

In [24]:
feat_cols = ['album_id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms', 'time_signature', 'album_release']
lyri_cols = ['album_id', 'lyrics', 'album_release']

In [25]:
final_df.head()

Unnamed: 0,artist,artist_id,album,album_type,album_id,album_label,album_popularity,release_date,release_prec,album_release,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Eleanor Arroway,3x2VCXXsd5bySXXdbhLZPJ,Reaching Out,single,3RoG4xBbhThunES2bojEm7,Firefly Entertainment,53,2019-08-19,day,Reaching Out by Eleanor Arroway,...,-23.6635,1.0,0.03865,0.86925,0.8575,0.0886,0.06895,99.83425,242864.5,3.0
1,The Dangerous Summer,0iMnpaEHXkgMT956CmP1kj,Reach For The Sun,album,1awdBA8DLv6G5eBvdGxk8T,Hopeless Records,53,2009-05-05,day,Reach For The Sun by The Dangerous Summer,...,-4.239455,0.818182,0.062891,0.009349,2.3e-05,0.1856,0.559455,136.793,224471.454545,3.818182
2,Blackmill,4kjWnaLfIRcLJ1Dy4Wr6tY,Reach For Glory,album,0XHpO9qTpqJJQwa2zFxAAE,Blackmill,52,2011-02-28,day,Reach For Glory by Blackmill,...,-10.143727,0.272727,0.042245,0.165232,0.281491,0.204318,0.257136,139.569182,275057.363636,3.909091
3,potsu,5XE0fiZWGbq9TcSuWwJ1fA,Reaching For a Star,album,5bcobCRR5ovNoZWAp3iJgG,potsu,53,2020-07-12,day,Reaching For a Star by potsu,...,-10.355214,0.571429,0.132664,0.659143,0.318495,0.1842,0.362143,106.973857,177415.5,4.0
4,Chama Wijnen,7ftRhte6NNzoF9MIKfKJsP,Reaching,single,1coFYbyXU2dxO3lrUsbdIy,Chama Wijnen,56,2019-05-25,day,Reaching by Chama Wijnen,...,-29.599,1.0,0.0388,0.993,0.796,0.106,0.0799,72.082,176500.0,4.0


In [26]:
final_df['genres'] = final_df['genres'].apply(lambda x: ' '.join(x))

### Filter albums by artist genre is soul & soul

In [30]:
# soul = final_df[final_df['genres'].str.contains('soul')]

In [104]:
# rb = final_df[final_df['genres'].str.contains('r&b')]

In [31]:
audio_feature_final_df = final_df[feat_cols]
lyrics_final_df = final_df[lyri_cols]

In [32]:
lyrics_flag = pd.isna(lyrics_final_df['lyrics'])

In [90]:
# # group tracks by album
# audio_feature_tracks = audio_feature_tracks.groupby('album_id').mean()
# audio_feature_tracks

In [33]:
audio_feature_final_df.set_index('album_id', inplace=True)
audio_feature_final_df.pop('album_release');
audio_feature_final_df

Unnamed: 0_level_0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
album_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3RoG4xBbhThunES2bojEm7,0.131300,0.119325,4.750000,-23.663500,1.000000,0.038650,0.869250,0.857500,0.088600,0.068950,99.834250,242864.500000,3.000000
1awdBA8DLv6G5eBvdGxk8T,0.462455,0.873273,5.000000,-4.239455,0.818182,0.062891,0.009349,0.000023,0.185600,0.559455,136.793000,224471.454545,3.818182
0XHpO9qTpqJJQwa2zFxAAE,0.566727,0.619909,5.454545,-10.143727,0.272727,0.042245,0.165232,0.281491,0.204318,0.257136,139.569182,275057.363636,3.909091
5bcobCRR5ovNoZWAp3iJgG,0.643357,0.482071,3.571429,-10.355214,0.571429,0.132664,0.659143,0.318495,0.184200,0.362143,106.973857,177415.500000,4.000000
1coFYbyXU2dxO3lrUsbdIy,0.313000,0.007290,9.000000,-29.599000,1.000000,0.038800,0.993000,0.796000,0.106000,0.079900,72.082000,176500.000000,4.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2fYhqwDWXjbpjaIJPEfKFw,0.668250,0.525583,4.666667,-6.622000,0.666667,0.141642,0.298733,0.004568,0.154742,0.388917,113.231167,205851.166667,3.833333
0g1F5eGVwX4Sxi1n8ojPkE,0.588000,0.664818,3.818182,-5.072455,0.909091,0.052591,0.188452,0.000000,0.164909,0.518818,131.354909,192924.909091,4.000000
4gvQO5mEuhbMSrLIuwXkmz,0.783400,0.674300,5.900000,-7.241900,0.600000,0.124690,0.178350,0.001068,0.158800,0.618000,117.293300,181906.000000,4.000000
6x4UQQ7TVOja89iXV047Zo,0.348000,0.652000,3.000000,-7.145000,1.000000,0.093100,0.016500,0.000264,0.330000,0.172000,70.956000,192987.000000,4.000000


In [38]:
# lyrics_tracks['lyrics'] = lyrics_tracks['lyrics'].apply(lambda x: decode_lyrics(x))

In [227]:
# lyrics_tracks = lyrics_tracks[~lyrics_flag]
# groupby_test = lyrics_tracks.groupby('album_id')['lyrics'].apply(lambda x: ' '.join(x))
# lyrics_tracks = pd.DataFrame(groupby_test)
# # lyrics_tracks

In [34]:
lyrics_final_df.set_index('album_id', inplace=True)
lyrics_final_df.pop('album_release');
lyrics_final_df

Unnamed: 0_level_0,lyrics
album_id,Unnamed: 1_level_1
3RoG4xBbhThunES2bojEm7,
1awdBA8DLv6G5eBvdGxk8T,"In the back of my mind, well I fought, my Go..."
0XHpO9qTpqJJQwa2zFxAAE,Original video InstrumentalInstrumentalInstru...
5bcobCRR5ovNoZWAp3iJgG,They said that they want me No matt...
1coFYbyXU2dxO3lrUsbdIy,
...,...
2fYhqwDWXjbpjaIJPEfKFw,"Step up, the two of us, nobody knows us Get ..."
0g1F5eGVwX4Sxi1n8ojPkE,I couldn't help but notice you were sitting ...
4gvQO5mEuhbMSrLIuwXkmz,"Yeh yeh, yeh yeh, yeh yeh La calle est ap..."
6x4UQQ7TVOja89iXV047Zo,Bazzi No Way! Megan Thee Stallion Hot Girl...


In [35]:
# track_info_by_album = audio_feature_tracks.merge(lyrics_tracks, on='album_id')
df_for_recommender = audio_feature_final_df.merge(lyrics_final_df, on='album_id')
# dfs_for_recomm = [track_info_by_album, from_script_info_by_album]

In [41]:
# df_for_recommender = pd.concat(dfs_for_recomm)

In [36]:
df_for_recomm_lyrics_flag = pd.isna(df_for_recommender['lyrics'])

In [37]:
df_for_recommender = df_for_recommender[~df_for_recomm_lyrics_flag]

In [38]:
df_for_recommender.head()

Unnamed: 0_level_0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics
album_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1awdBA8DLv6G5eBvdGxk8T,0.462455,0.873273,5.0,-4.239455,0.818182,0.062891,0.009349,2.329909e-05,0.1856,0.559455,136.793,224471.454545,3.818182,"In the back of my mind, well I fought, my Go..."
0XHpO9qTpqJJQwa2zFxAAE,0.566727,0.619909,5.454545,-10.143727,0.272727,0.042245,0.165232,0.2814905,0.204318,0.257136,139.569182,275057.363636,3.909091,Original video InstrumentalInstrumentalInstru...
5bcobCRR5ovNoZWAp3iJgG,0.643357,0.482071,3.571429,-10.355214,0.571429,0.132664,0.659143,0.3184946,0.1842,0.362143,106.973857,177415.5,4.0,They said that they want me No matt...
6Zho4ar8UMxJLMDpWcLGto,0.49625,0.183,3.25,-15.0645,1.0,0.033725,0.78025,0.0009364,0.13975,0.141975,112.43025,216186.25,4.0,When you try your best but you don't succeed...
07OjWJVHVzsbZ5ytXhj7ou,0.708667,0.620056,4.0,-8.682722,0.611111,0.319444,0.031173,1.566667e-07,0.184667,0.522444,121.037,163378.055556,3.777778,Lyrics for this song ha...


In [39]:
df_for_recommender.info()

<class 'pandas.core.frame.DataFrame'>
Index: 742 entries, 1awdBA8DLv6G5eBvdGxk8T to 2xwFfH8fO1MRmWZbNKZVqt
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      742 non-null    float64
 1   energy            742 non-null    float64
 2   key               742 non-null    float64
 3   loudness          742 non-null    float64
 4   mode              742 non-null    float64
 5   speechiness       742 non-null    float64
 6   acousticness      742 non-null    float64
 7   instrumentalness  742 non-null    float64
 8   liveness          742 non-null    float64
 9   valence           742 non-null    float64
 10  tempo             742 non-null    float64
 11  duration_ms       742 non-null    float64
 12  time_signature    742 non-null    float64
 13  lyrics            742 non-null    object 
dtypes: float64(13), object(1)
memory usage: 87.0+ KB


In [44]:
album_key_dictionary = dict()

In [45]:
for _id, release in zip(final_df.album_id.unique(), final_df.album_release.unique()):
    if _id not in album_key_dictionary:
        album_key_dictionary[_id] = release
    else:
        continue
        
album_key_dictionary

{'3RoG4xBbhThunES2bojEm7': 'Reaching Out by Eleanor Arroway',
 '1awdBA8DLv6G5eBvdGxk8T': 'Reach For The Sun by The Dangerous Summer',
 '0XHpO9qTpqJJQwa2zFxAAE': 'Reach For Glory by Blackmill',
 '5bcobCRR5ovNoZWAp3iJgG': 'Reaching For a Star by potsu',
 '1coFYbyXU2dxO3lrUsbdIy': 'Reaching by Chama Wijnen',
 '6Zho4ar8UMxJLMDpWcLGto': 'Refuge by Canyon City',
 '07OjWJVHVzsbZ5ytXhj7ou': 'Real Hate by Philthy Rich',
 '6xBdoRdtUb2Wl23WkyJGQj': 'Reach for Her Echo by Luke Davies',
 '7nWW2h7SFTPQJgJX0h1IMA': 'Reaching by The Midnight Quartet',
 '4hxJ8XzBPYGHeHRFhEd42y': 'Reach by Eyes Set To Kill',
 '5CfVDwhGVceENVF42EZkej': 'Rauchzeichen by Dramadigs',
 '1csZ1TtmblBsD8JkdSaJdL': 'REACHUPDONTSTOP by Baauer',
 '7w7VqxP5aEOz7SFK3ocyUX': 'Reach For The Sky by Ratt',
 '5vnOVW8xaIJVwyITqAVkvW': 'Reaching into Infinity (Special Edition) by DragonForce',
 '6U8xWVIzm3AusSeTnxwf4m': 'Real Hate (Deluxe Edition) by Philthy Rich',
 '06jz1EAw6DYQ2kRYy4A3Ph': 'Refuge by Samuel Kim',
 '59PSddujJQucvH3gV7JWjM

In [56]:
# pickle.dump(album_key_dictionary, open('album_key_dictionary.pkl', 'wb'))

In [50]:
# pickle.dump(df_for_recommender, open('df_for_recommender.pkl', 'wb'))

In [52]:
# pickle.dump(genres_df, open('genres_df.pkl', 'wb'))