In [18]:
import pandas as pd
import numpy as np
import re
import itertools
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import warnings
warnings.filterwarnings("ignore")

In [19]:
pd.set_option('display.max_columns', None)

In [20]:
df_main = pd.read_csv(r'data\data_updated.csv')
complete_feature_set = pd.read_csv(r'data\data_feature_set.csv')

In [21]:
complete_feature_set.drop('Unnamed: 0', axis=1, inplace=True)

In [22]:
df_main.drop('Unnamed: 0', axis=1, inplace=True)

In [23]:
top_songs = pd.read_csv('topsongs6months_100_features.csv')

In [24]:
top_songs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Unnamed: 0      100 non-null    int64  
 1   Song            100 non-null    object 
 2   Artist          100 non-null    object 
 3   id              100 non-null    object 
 4   fav_song_score  100 non-null    float64
dtypes: float64(1), int64(1), object(3)
memory usage: 4.0+ KB


In [25]:
top_songs_upd = top_songs.drop('Unnamed: 0', axis=1)

In [26]:
top_songs_upd['id'] = [re.split('track:', top_songs_upd['id'][i])[1] for i in range(0, top_songs_upd.shape[0])]

In [27]:
top_songs_upd.head()

Unnamed: 0,Song,Artist,id,fav_song_score
0,Present Tense,Radiohead,7KHQtpLpoIV3Wfu22YQT8y,0.8
1,Waltz #1,Elliott Smith,4SD4ri0Msp6DxgoXKcf6ug,0.60115
2,Stupidity Tries,Elliott Smith,4ggVOqzCnaK9vFXAuvxUeM,0.333031
3,Decks Dark,Radiohead,4mRSbPLnOm54ttkTYvxxSY,0.317915
4,Jigsaw Falling Into Place,Radiohead,0YJ9FWWHn9EfnN0lHwbzvV,0.295091


In [28]:
def generate_playlist_feature(complete_feature_set, playlist_df, weight_factor):

    
    complete_feature_set_playlist = complete_feature_set[complete_feature_set['id'].isin(playlist_df['id'].values)]
    complete_feature_set_playlist = complete_feature_set_playlist.merge(playlist_df[['id','fav_song_score']], on = 'id', how = 'inner')
    complete_feature_set_nonplaylist = complete_feature_set[~complete_feature_set['id'].isin(playlist_df['id'].values)]
    
    playlist_feature_set = complete_feature_set_playlist.sort_values('fav_song_score',ascending=False)

    playlist_feature_set['weight'] = playlist_feature_set['fav_song_score'].apply(lambda x: weight_factor ** (-x))
    
    playlist_feature_set_weighted = playlist_feature_set.copy()

    playlist_feature_set_weighted.update(playlist_feature_set_weighted.iloc[:,:-4].mul(playlist_feature_set_weighted.weight,0))
    
    playlist_feature_set_weighted_final = playlist_feature_set_weighted.iloc[:, :-4]
    
    playlist_feature_set_weighted_final['id'] = playlist_feature_set['id']
    
    return playlist_feature_set_weighted_final.sum(axis = 0), complete_feature_set_nonplaylist

In [29]:
complete_feature_set_playlist_vector, complete_feature_set_nonplaylist = generate_playlist_feature(complete_feature_set, top_songs_upd, 1.1)

In [30]:
complete_feature_set_playlist_vector

genre|21st_century_classical                                                  0.0
genre|432hz                                                                   0.0
genre|_hip_hop                                                                0.0
genre|a_cappella                                                              0.0
genre|abstract                                                                0.0
                                                      ...                        
year|2016                                                                     0.0
year|2017                                                                     0.0
year|2018                                                                     0.0
year|2019                                                                0.496981
id                              4SD4ri0Msp6DxgoXKcf6ug5AMrnF761nziCWUfjBgRUI6Y...
Length: 3050, dtype: object

In [31]:
complete_feature_set_nonplaylist.drop(list(set(complete_feature_set_nonplaylist.columns).difference(set(complete_feature_set_playlist_vector.index))), axis=1, inplace=True)

In [32]:
client_id = 'client id'
client_secret = 'secret id'
sp = spotipy.Spotify(client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

In [35]:
def generate_playlist_recos(df, features, nonplaylist_features):
    non_playlist_df = df[df['id'].isin(nonplaylist_features['id'].values)]
    non_playlist_df['sim'] = cosine_similarity(nonplaylist_features.drop('id', axis = 1).values, features.drop(labels='id').values.reshape(1, -1))[:,0]
    non_playlist_df_top_40 = non_playlist_df.sort_values('sim',ascending = False).head(50)
    non_playlist_df_top_40['url'] = non_playlist_df_top_40['id'].apply(lambda x: sp.track(x)['album']['images'][1]['url'])
    return non_playlist_df_top_40

In [36]:
top50_recommendations = generate_playlist_recos(df_main, complete_feature_set_playlist_vector, complete_feature_set_nonplaylist)

In [37]:
top50_recommendations_upd = top50_recommendations.loc[:, ['id', 'name', 'artists', 'year', 'url']]

In [38]:
top50_recommendations_upd.reset_index(drop=True, inplace=True)

In [39]:
top50_recommendations_upd

Unnamed: 0,id,name,artists,year,url
0,3pwj90XGAD6I0jO3q6fkCx,Baby Britain,['Elliott Smith'],1998,https://i.scdn.co/image/ab67616d00001e02cdf1be...
1,4xfAVJL8R7mVYbDk8a9xOY,Somebody That I Used To Know,['Elliott Smith'],2000,https://i.scdn.co/image/ab67616d00001e02169a23...
2,3pM7gfel0ho7yVJw4KK5Eq,Independence Day,['Elliott Smith'],1998,https://i.scdn.co/image/ab67616d00001e02cdf1be...
3,4zetUxeSCCDwrr1jJT0SuD,Miss Misery,['Elliott Smith'],1998,https://i.scdn.co/image/ab67616d00001e02e35493...
4,44xqX8IBgEkis0PZrKRXp1,Bled White,['Elliott Smith'],1998,https://i.scdn.co/image/ab67616d00001e02cdf1be...
5,6Hlt4S0nm1URyqM0WLlQzY,Easy Way Out,['Elliott Smith'],2000,https://i.scdn.co/image/ab67616d00001e02169a23...
6,2c7AnyxXkbemyTEpzprQVx,Bottle Up And Explode!,['Elliott Smith'],1998,https://i.scdn.co/image/ab67616d00001e02cdf1be...
7,7AG284RTvcDG9uHt1NQIbu,"Oh Well, OK",['Elliott Smith'],1998,https://i.scdn.co/image/ab67616d00001e02cdf1be...
8,1cYudm5fk5igcsjroJKmFw,Tomorrow Tomorrow,['Elliott Smith'],1998,https://i.scdn.co/image/ab67616d00001e02cdf1be...
9,1Q0sk7b7PAGjgC3R5zyuWt,Pitseleh,['Elliott Smith'],1998,https://i.scdn.co/image/ab67616d00001e02cdf1be...
