In [1]:
import os
import pandas as pd
import numpy as np
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import yaml
import re
from tqdm import tqdm
import multiprocessing as mp
import time
import random
import datetime
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from skimage import io
from sklearn.decomposition import PCA

##########################################################################################
# Load Spotify Credentials


def load_spotify_credentials(file_path):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)

##########################################################################################
# Authenticate with Spotify


def local_authenticate_spotify(spotify_credentials):
    # Adjusting to the keys as they appear in the YAML file
    client_id = spotify_credentials.get("Client_id")
    client_secret = spotify_credentials.get("client_secret")

    if not client_id or not client_secret:
        raise ValueError(
            "Spotify credentials must include 'Client_id' and 'client_secret'.")

    auth_manager = SpotifyClientCredentials(
        client_id=client_id, client_secret=client_secret)
    return spotipy.Spotify(auth_manager=auth_manager)


##########################################################################################
# Load and preprocess dataset


def load_and_preprocess_data(file_path):
    dtypes = {
        'track_uri': 'object', 'artist_uri': 'object', 'album_uri': 'object',
        'danceability': 'float16', 'energy': 'float16', 'key': 'float16',
        'loudness': 'float16', 'mode': 'float16', 'speechiness': 'float16',
        'acousticness': 'float16', 'instrumentalness': 'float16',
        'liveness': 'float16', 'valence': 'float16', 'tempo': 'float16',
        'duration_ms': 'float32', 'time_signature': 'float16',
        'Track_release_date': 'int8', 'Track_pop': 'int8', 'Artist_pop': 'int8',
        'Artist_genres': 'object'
    }
    try:
        df = pd.read_csv(file_path, dtype=dtypes)
    except Exception as e:
        print('Failed to load data:', e)
        df = pd.DataFrame()

    # Add any additional preprocessing steps here if needed
    return df

##########################################################################################


def get_IDs(spotify_client, playlist_id):
    track_ids = []
    artist_ids = []
    playlist = spotify_client.playlist(playlist_id)

    for item in playlist['tracks']['items']:
        track = item['track']
        track_ids.append(track['id'])
        artist = item['track']['artists']
        artist_ids.append(artist[0]['id'])

    return track_ids, artist_ids

##########################################################################################


def fetch_track_artist_details(spotify_client, track_ids_uni, artist_id_uni):
    audio_features = pd.DataFrame()
    track_details = pd.DataFrame()
    artist_details = pd.DataFrame()

    # Fetch track audio features
    for i in tqdm(range(0, len(track_ids_uni), 25)):
        try:
            track_feature = spotify_client.audio_features(
                track_ids_uni[i:i+25])
            track_df = pd.DataFrame(track_feature)
            audio_features = pd.concat([audio_features, track_df], axis=0)
        except Exception as e:
            print(e)

    # Fetch track details
    for i in tqdm(range(0, len(track_ids_uni), 25)):
        try:
            track_features = spotify_client.tracks(track_ids_uni[i:i+25])
            for x in range(len(track_features['tracks'])):
                track_pop = pd.DataFrame(
                    [track_ids_uni[i+x]], columns=['Track_uri'])
                track_pop['Track_release_date'] = track_features['tracks'][x]['album']['release_date']
                track_pop['Track_pop'] = track_features['tracks'][x]['popularity']
                track_pop['Artist_uri'] = track_features['tracks'][x]['artists'][0]['id']
                track_pop['Album_uri'] = track_features['tracks'][x]['album']['id']
                track_details = pd.concat([track_details, track_pop], axis=0)
        except Exception as e:
            print(e)

    # Fetch artist details
    for i in tqdm(range(0, len(artist_id_uni), 25)):
        try:
            artist_features = spotify_client.artists(artist_id_uni[i:i+25])
            for x in range(len(artist_features['artists'])):
                artist_df = pd.DataFrame(
                    [artist_id_uni[i+x]], columns=['Artist_uri'])
                artist_pop = artist_features['artists'][x]['popularity']
                artist_genres = artist_features['artists'][x]['genres']
                artist_df['Artist_pop'] = artist_pop
                artist_df['genres'] = " ".join(
                    [re.sub(' ', '_', genre) for genre in artist_genres]) if artist_genres else "unknown"
                artist_details = pd.concat([artist_details, artist_df], axis=0)
        except Exception as e:
            print(e)

    return audio_features, track_details, artist_details

##########################################################################################


# def preprocess_and_merge_data(df, audio_features, track_details, artist_details):
#     # Rename columns
#     track_details.rename(columns={
#         'Track_uri': 'track_uri', 'Artist_uri': 'artist_uri', 'Album_uri': 'album_uri'}, inplace=True)

#     # Drop unnecessary columns in audio_features
#     audio_features.drop(
#         columns=['type', 'uri', 'track_href', 'analysis_url'], axis=1, inplace=True)

#     # Merging dataframes
#     merged_df = pd.merge(track_details, audio_features,
#                          left_on="track_uri", right_on="id", how='outer')
#     merged_df = pd.merge(merged_df, artist_details,
#                          left_on="artist_uri", right_on="Artist_uri", how='outer')

#     # Rename and conditionally drop columns
#     merged_df.rename(columns={'genres': 'Artist_genres'}, inplace=True)
#     columns_to_drop = ['Track_uri', 'Artist_uri_x',
#                        'Artist_uri_y', 'Album_uri', 'id']
#     columns_to_drop = [
#         col for col in columns_to_drop if col in merged_df.columns]
#     merged_df.drop(columns=columns_to_drop, axis=1, inplace=True)

#     # Data cleaning and transformation
#     merged_df.dropna(axis=0, inplace=True)
#     merged_df['Track_pop'] = merged_df['Track_pop'].apply(lambda x: int(x/5))
#     merged_df['Artist_pop'] = merged_df['Artist_pop'].apply(lambda x: int(x/5))
#     merged_df['Track_release_date'] = merged_df['Track_release_date'].apply(
#         lambda x: x.split('-')[0])
#     merged_df['Track_release_date'] = merged_df['Track_release_date'].astype(
#         'int16')
#     merged_df['Track_release_date'] = merged_df['Track_release_date'].apply(
#         lambda x: int(x/50))

#     # Converting data types for efficiency
#     cols_to_float16 = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
#                        'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']
#     merged_df[cols_to_float16] = merged_df[cols_to_float16].astype('float16')
#     merged_df['duration_ms'] = merged_df['duration_ms'].astype('float32')
#     cols_to_int8 = ['Track_release_date', 'Track_pop', 'Artist_pop']
#     merged_df[cols_to_int8] = merged_df[cols_to_int8].astype('int8')

#     # TF-IDF Transformation
#     tfidf = TfidfVectorizer(max_features=5)  # Adjust max_features as needed
#     merged_df['Artist_genres'] = merged_df['Artist_genres'].apply(lambda x: x.split(" "))
#     tfidf_matrix = tfidf.fit_transform(merged_df['Artist_genres'].apply(lambda x: " ".join(x)))
#     genre_df = pd.DataFrame(tfidf_matrix.toarray())
#     genre_df.columns = ['genre|' + i for i in tfidf.get_feature_names_out()]
#     genre_df = genre_df.astype('float16')

#     # Final dataframe construction
#     merged_df.drop(columns=['Artist_genres'], axis=1, inplace=True)
#     merged_df = pd.concat([merged_df.reset_index(drop=True), genre_df.reset_index(drop=True)], axis=1)

#     # Handling genre|unknown column if present
#     try:
#         merged_df.drop(columns=['genre|unknown'], axis=1, inplace=True)
#     except KeyError:
#         print('genre|unknown not found')

#     # MinMaxScaler transformation
#     scaler = MinMaxScaler()
#     numeric_cols = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
#                     'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
#                     'duration_ms', 'time_signature', 'Track_release_date', 'Track_pop', 'Artist_pop']
#     numeric_cols = [col for col in numeric_cols if col in merged_df.columns]
#     merged_df[numeric_cols] = scaler.fit_transform(merged_df[numeric_cols])
#     pickle.dump(scaler, open('./data/scaler.sav', 'wb'))

#     return merged_df

##########################################################################################

##########################################################################################
def recommend_tracks(df, playvec, sp):
    # Select numeric features
    numeric_features = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
                        'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature', 'Track_release_date', 'Track_pop', 'Artist_pop']

    # Extract numeric features for PCA
    df_new_numeric = df[numeric_features]
    playvec_new_numeric = playvec[numeric_features]

    # Initialize and fit PCA
    pca = PCA(n_components=0.95)
    pca.fit(df_new_numeric)

    # Transform both df and playvec numeric features using PCA
    df_pca = pca.transform(df_new_numeric)
    playvec_pca = pca.transform(playvec_new_numeric)

    # Convert PCA components to DataFrame
    df_pca_df = pd.DataFrame(df_pca, index=df.index)
    playvec_pca_df = pd.DataFrame(playvec_pca, index=playvec.index)

    # Calculate cosine similarity for PCA and genres
    df['sim_pca'] = cosine_similarity(df_pca_df, playvec_pca_df)
    df['sim_genres'] = cosine_similarity(df.loc[:, df.columns.str.startswith(
        'genre')], playvec.loc[:, playvec.columns.str.startswith('genre')])
    df['sim_combined'] = (df['sim_pca'] + df['sim_genres']) / 2

    # Sort based on similarity score
    df = df.sort_values(['sim_genres', 'sim_combined'],
                        ascending=False, kind='stable')

    # Get the list of top track URIs
    top_tracks = df.groupby('artist_uri').head(2).track_uri.head(20)

    # Fetch track details from Spotify
    track_details = sp.tracks(top_tracks[0:20])
    Fresult = pd.DataFrame()
    for i in range(20):
        result = pd.DataFrame([i], columns=['Index'])
        result['track_name'] = track_details['tracks'][i]['name']
        result['artist_name'] = track_details['tracks'][i]['artists'][0]['name']
        result['pop'] = track_details['tracks'][i]["popularity"]
        Fresult = pd.concat([Fresult, result], axis=0)

    return Fresult.reset_index(drop=True)

##########################################################################################


def fetch_spotify_recommendations(test, sp):
    Spotifyresult = pd.DataFrame()

    for i in range(len(test) - 1):
        if len(Spotifyresult) >= 20:
            break

        # Fetch recommendations based on a seed of tracks
        ff = sp.recommendations(seed_tracks=list(
            test.track_uri[1 + i:5 + i]), limit=2)

        for z in range(2):
            result = pd.DataFrame([z + (2 * i) + 1], columns=['Index'])
            result['track_name'] = ff['tracks'][z]['name']
            result['artist_name'] = ff['tracks'][z]['artists'][0]['name']
            result['pop'] = ff['tracks'][z]["popularity"]
            Spotifyresult = pd.concat([Spotifyresult, result], axis=0)

    return Spotifyresult.reset_index(drop=True)

##########################################################################################
# Main execution function

## Check functions

In [2]:
spotify_credentials_file = 'Spotify.yaml'
spotify_credentials = load_spotify_credentials(spotify_credentials_file)
sp = local_authenticate_spotify(spotify_credentials)

In [3]:
# Example playlist ID - replace with the actual ID
playlist_id = 'spotify:playlist:37i9dQZF1E8NgXcf5gQPXv'

In [4]:
# Fetch track and artist IDs
track_ids, artist_ids = get_IDs(sp, playlist_id)

In [5]:
# Fetch track and artist details
audio_features, track_details, artist_details = fetch_track_artist_details(sp, 
                                                                           track_ids, 
                                                                           artist_ids)

100%|██████████| 2/2 [00:00<00:00,  6.93it/s]
100%|██████████| 2/2 [00:00<00:00,  2.29it/s]
100%|██████████| 2/2 [00:00<00:00,  3.24it/s]


In [6]:
artist_details.head()

Unnamed: 0,Artist_uri,Artist_pop,genres
0,0AspLZGQkP38yddNoD0pLn,55,latin_viral_pop mexican_pop reggaeton_mexicano
0,1QivQCLVipV61DiQiyV14A,64,latin_viral_pop reggaeton_mexicano urbano_mexi...
0,5xSx2FM8mQnrfgM1QsHniB,69,latin_pop latin_viral_pop mexican_pop
0,7uQ1D2NNHs5cUL3CLKRbia,61,pop_venezolano
0,6kfMprr3qI5Bg2JNSGU7l7,50,colombian_pop


In [7]:
artist_details.shape

(50, 3)

In [8]:
track_details.head()

Unnamed: 0,Track_uri,Track_release_date,Track_pop,Artist_uri,Album_uri
0,1abN81EF3BNKoVpSxvQXPi,2022-03-04,63,0AspLZGQkP38yddNoD0pLn,158sVr4HeR7ukWJMDGmdFw
0,5ggoKefe3Hk0UstyqSI8FA,2022-10-20,72,1QivQCLVipV61DiQiyV14A,417TYUiFFmZrHLGkTOTuJV
0,7dMTCS9BLzBqYTlAuHP8TM,2022-08-12,73,5xSx2FM8mQnrfgM1QsHniB,2SGONYwprYHZruYFhQYiFC
0,4fBDh59Cybyp5UNqoNGRfP,2022-04-29,61,7uQ1D2NNHs5cUL3CLKRbia,5INXB8bPP0WzT5lLuhRSlF
0,00upaXo7vjKRiqKZXeTJgK,2022-02-27,55,6kfMprr3qI5Bg2JNSGU7l7,0PNvwVWgpHWLRxDg7sFvYA


In [9]:
track_details.shape

(50, 5)

In [10]:
audio_features.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.752,0.587,6,-5.374,0,0.0291,0.281,9e-06,0.148,0.584,90.082,audio_features,1abN81EF3BNKoVpSxvQXPi,spotify:track:1abN81EF3BNKoVpSxvQXPi,https://api.spotify.com/v1/tracks/1abN81EF3BNK...,https://api.spotify.com/v1/audio-analysis/1abN...,185461,3
1,0.816,0.763,8,-4.488,0,0.0641,0.253,0.0,0.0967,0.313,104.952,audio_features,5ggoKefe3Hk0UstyqSI8FA,spotify:track:5ggoKefe3Hk0UstyqSI8FA,https://api.spotify.com/v1/tracks/5ggoKefe3Hk0...,https://api.spotify.com/v1/audio-analysis/5ggo...,212571,4
2,0.797,0.677,9,-4.163,0,0.0857,0.101,0.0,0.441,0.919,130.034,audio_features,7dMTCS9BLzBqYTlAuHP8TM,spotify:track:7dMTCS9BLzBqYTlAuHP8TM,https://api.spotify.com/v1/tracks/7dMTCS9BLzBq...,https://api.spotify.com/v1/audio-analysis/7dMT...,219320,4
3,0.745,0.795,7,-5.091,0,0.0367,0.113,0.0,0.107,0.718,116.954,audio_features,4fBDh59Cybyp5UNqoNGRfP,spotify:track:4fBDh59Cybyp5UNqoNGRfP,https://api.spotify.com/v1/tracks/4fBDh59Cybyp...,https://api.spotify.com/v1/audio-analysis/4fBD...,180649,4
4,0.868,0.428,4,-8.441,1,0.107,0.666,0.0,0.0918,0.727,90.047,audio_features,00upaXo7vjKRiqKZXeTJgK,spotify:track:00upaXo7vjKRiqKZXeTJgK,https://api.spotify.com/v1/tracks/00upaXo7vjKR...,https://api.spotify.com/v1/audio-analysis/00up...,182000,4


In [11]:
audio_features.shape

(50, 18)

In [12]:
data_file_path = './data/1M_processed.csv'
df = load_and_preprocess_data(data_file_path)

In [13]:
def preprocess_and_merge_data(df, audio_features, track_details, artist_details):
    # Rename columns
    test=pd.DataFrame(track_details , columns=['Track_uri','Artist_uri','Album_uri'])
    test.rename(columns = {'Track_uri':'track_uri','Artist_uri':'artist_uri','Album_uri':'album_uri'}, inplace = True)
    audio_features_update = audio_features.copy()
    audio_features_update.drop(columns=['type','uri','track_href','analysis_url'],axis=1,inplace=True)

    test = pd.merge(test, audio_features_update, left_on = "track_uri", right_on= "id",how = 'inner')
    test = pd.merge(test, track_details, left_on = "track_uri", right_on= "Track_uri",how = 'inner')
    test = pd.merge(test, artist_details, left_on = "artist_uri", right_on= "Artist_uri",how = 'inner')
    #test.drop_duplicates(inplace=True)
    
    test.rename(columns = {'genres':'Artist_genres'}, inplace = True)
    test.drop(columns=['Track_uri','Artist_uri_x','Artist_uri_y','Album_uri','id'],axis=1,inplace=True)
    test.dropna(axis=0,inplace=True)

    test['Track_pop'] = test['Track_pop'].apply(lambda x: int(x/5))
    test['Artist_pop'] = test['Artist_pop'].apply(lambda x: int(x/5))
    test['Track_release_date'] = test['Track_release_date'].apply(lambda x: x.split('-')[0])
    test['Track_release_date']=test['Track_release_date'].astype('int16')
    test['Track_release_date'] = test['Track_release_date'].apply(lambda x: int(x/50))

    test[['danceability', 'energy', 'key','loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness','liveness', 'valence', 'tempo', 'time_signature']]=test[['danceability', 'energy', 'key','loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness','liveness', 'valence', 'tempo','time_signature']].astype('float16')
    test[['duration_ms']]=test[['duration_ms']].astype('float32')
    test[['Track_release_date', 'Track_pop', 'Artist_pop']]=test[['Track_release_date', 'Track_pop', 'Artist_pop']].astype('int8')
    currentdf=len(df)

    df=pd.concat([df,test],axis=0)
    df.drop_duplicates(subset=['track_uri'],inplace=True,keep='last') ## keep last to keep the dataset updated
    df.dropna(axis=0,inplace=True)
    
    #saving the tracks if they weren't found in the dataset
    if len(df)>currentdf:
        df.to_csv('./data/1M_processed.csv',index=False)
        print('{} New Found'.format(len(df)-currentdf))
        streamlit=df[df.Track_pop >0]             # dropped track with 0 popularity score to save space and ram for the final model
        ##### may need to adjust#####
        streamlit.to_csv('./data/streamlit.csv',index=False)
        del streamlit

    df = df[~df['track_uri'].isin(test['track_uri'].values)]
    test['Artist_genres'] = test['Artist_genres'].apply(lambda x: x.split(" "))
    tfidf = TfidfVectorizer(max_features=5) #max_features=5
    tfidf_matrix = tfidf.fit_transform(test['Artist_genres'].apply(lambda x: " ".join(x)))
    genre_df = pd.DataFrame(tfidf_matrix.toarray())
    genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names_out()]

    genre_df=genre_df.astype('float16')
    test.drop(columns=['Artist_genres'],axis=1,inplace=True)
    test = pd.concat([test.reset_index(drop=True), genre_df.reset_index(drop=True)],axis = 1)
    test.isna().sum().sum()
    
    
    df['Artist_genres'] = df['Artist_genres'].apply(lambda x: x.split(" "))
    tfidf_matrix = tfidf.transform(df['Artist_genres'].apply(lambda x: " ".join(x)))
    genre_df = pd.DataFrame(tfidf_matrix.toarray())
    genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names_out()]
    genre_df=genre_df.astype('float16')
    df.drop(columns=['Artist_genres'],axis=1,inplace=True)

    df = pd.concat([df.reset_index(drop=True), genre_df.reset_index(drop=True)],axis = 1)
    try:
        df.drop(columns=['genre|unknown'],axis=1,inplace=True)
        test.drop(columns=['genre|unknown'],axis=1,inplace=True)
    except:
        print('genre|unknown not found')

    sc=MinMaxScaler()
    df[df.columns[3:19]] = sc.fit_transform(df.iloc[:,3:19]) #in the saved dataset get all rows, and columns including audio features. note that genre is not included
    pickle.dump(sc, open('./data/sc.sav', 'wb'))

    test[test.columns[3:19]] = sc.transform(test.iloc[:,3:19]) #based on input play list, get all rows, and columns including audio features. note that genre is not included
    
    return df, test

In [14]:
# Merge data
df_update, test = preprocess_and_merge_data(df, 
                                 audio_features, 
                                 track_details, 
                                 artist_details)

genre|unknown not found


In [15]:
test.tail(20)

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|latin_arena_pop,genre|latin_pop,genre|latin_viral_pop,genre|mexican_pop,genre|reggaeton_mexicano
30,6QO6mM71UuwzCPXUSprNnn,1QgrwYywvDuC43MDtR8cqq,5DtewXaWzI7IDVLeM58u0V,0.684236,0.666016,0.0,0.857772,1.0,0.031501,0.511765,...,0.028727,0.8,1.0,0.4,0.6,0.0,0.0,0.0,0.0,0.0
31,10AfmkJNFYQe8IB4lDPk7C,1QgrwYywvDuC43MDtR8cqq,5DtewXaWzI7IDVLeM58u0V,0.753695,0.48291,0.818182,0.857652,1.0,0.03224,0.13652,...,0.029609,0.8,1.0,0.5,0.6,0.0,0.0,0.0,0.0,0.0
32,7mK3TwbmGlXcBGTWztkQbe,0PrhwIWbqYFYyY2ZrkIWgI,0KTZkb2lyXX2FZ8vGfqxtH,0.546798,0.486084,0.818182,0.852158,1.0,0.034188,0.501961,...,0.027628,0.8,1.0,0.45,0.6,0.0,0.0,0.0,0.0,0.0
33,0J87ieRdbGPItHA6qc3KCr,5B8ApeENp4bE4EE3LI8jK2,4l8OrF24fhXsCFF6wvP5fE,0.751724,0.479004,0.636364,0.838998,1.0,0.027291,0.569118,...,0.03831,0.8,1.0,0.65,0.65,0.68457,0.57373,0.0,0.449707,0.0
34,6Ec3FPbwfOCoICwSQrlPOz,3SCOuAxngTC1yGjKMcIPEd,5L5zhlV2NQALYptNM3WNIz,0.789163,0.660156,0.727273,0.86067,0.0,0.039938,0.052819,...,0.032911,0.8,1.0,0.6,0.6,0.0,0.0,0.0,0.0,0.0
35,6nQ3fhwmCWMoEa7m9Vn9du,0haZhu4fFKt0Ag94kZDiz2,3F5VmDv3oeerueNteT7JFc,0.719212,0.601074,0.090909,0.819318,1.0,0.31096,0.118444,...,0.031563,0.8,1.0,0.55,0.65,0.0,0.638672,0.584473,0.500488,0.0
36,6Z6FqnImFvNvxg9aHW2HKz,43qxAkuKFB6fMNSeS5dO7Z,731ECu5lhhgFIDPtbgOwe3,0.784729,0.681152,0.181818,0.837066,0.0,0.030999,0.294118,...,0.02851,0.8,1.0,0.5,0.6,0.0,0.0,0.0,1.0,0.0
37,4gBxREFDhxi8Hw4qeiYt7B,1gJdf4Yybu4X5A2xYV3NMV,73ldGcnuZUBccjAhd4jgXZ,0.855665,0.551758,0.454545,0.820706,1.0,0.058792,0.251961,...,0.027382,0.8,1.0,0.6,0.7,0.0,0.0,0.0,0.0,0.0
38,7wT5MIjA3LPuRFCt9KOA8M,2O3v9rCTzLhPFaGaAVgZLt,0HOVRCxR3Twx4U8sS4ZR86,0.566995,0.48291,0.636364,0.836402,1.0,0.032523,0.19277,...,0.037576,0.8,1.0,0.55,0.65,0.605957,0.507812,0.465088,0.397949,0.0
39,30TZy1etY3FAGBDGS58hRj,2O3v9rCTzLhPFaGaAVgZLt,7tUvz3LUmXYRCZX8lUPTrC,0.607389,0.47998,0.454545,0.817205,1.0,0.033245,0.686765,...,0.034251,0.8,1.0,0.7,0.65,0.605957,0.507812,0.465088,0.397949,0.0


In [16]:
test.shape

(50, 24)

In [17]:
# Generate playvec by summing the features of the user's playlist
playvec = pd.DataFrame(test.sum(axis=0)).T

In [18]:
playvec

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|latin_arena_pop,genre|latin_pop,genre|latin_viral_pop,genre|mexican_pop,genre|reggaeton_mexicano
0,1abN81EF3BNKoVpSxvQXPi2HsBez6K2kkqzkxHQqRusL7C...,0AspLZGQkP38yddNoD0pLn0AspLZGQkP38yddNoD0pLn0A...,158sVr4HeR7ukWJMDGmdFw48FTvOrTDP6bQb5gTIkDbN15...,35.050251,30.682861,21.363638,41.927135,38.0,3.55906,15.92549,...,1.589396,38.199997,50.0,27.750002,29.200001,6.101562,9.75,9.117188,12.5625,7.390625


In [19]:
print(df_update.columns[df_update.columns.str.startswith('genre')])
print(playvec.columns[playvec.columns.str.startswith('genre')])

Index(['genre|latin_arena_pop', 'genre|latin_pop', 'genre|latin_viral_pop',
       'genre|mexican_pop', 'genre|reggaeton_mexicano'],
      dtype='object')
Index(['genre|latin_arena_pop', 'genre|latin_pop', 'genre|latin_viral_pop',
       'genre|mexican_pop', 'genre|reggaeton_mexicano'],
      dtype='object')


In [20]:
df_update.head()

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|latin_arena_pop,genre|latin_pop,genre|latin_viral_pop,genre|mexican_pop,genre|reggaeton_mexicano
0,2THyGFz2KFSnnjilRrTDIM,2wIVse2owClT7go1WT98tk,5eNmebFdSZTcqExwoijGOO,0.719212,0.631836,0.363636,0.815213,0.0,0.117396,0.008249,...,0.034858,0.8,1.0,0.0,0.65,0.0,0.0,0.0,0.0,0.0
1,3BgwOI1jT8l6dFeAiESJVM,2wIVse2owClT7go1WT98tk,6UkdyvPElK6JDkyeRClbI2,0.75468,0.088684,0.454545,0.654392,1.0,0.987431,0.780882,...,0.003815,0.8,0.975,0.0,0.65,0.0,0.0,0.0,0.0,0.0
2,4DZpzJhrt8SG22hsyruUB1,26dSoYclwsYLMAKD3tpOr4,0eXGrEtsH0WVJnqp5imXAs,0.663054,0.64502,0.181818,0.826502,0.0,0.062092,0.005944,...,0.032865,0.8,1.0,0.0,0.75,0.0,0.0,0.0,0.0,0.0
3,3JyhzOBrTyTXHSAFRu0wKV,26dSoYclwsYLMAKD3tpOr4,0eXGrEtsH0WVJnqp5imXAs,0.370197,0.651855,0.545455,0.81642,1.0,0.112242,0.033333,...,0.033089,0.8,1.0,0.0,0.75,0.0,0.0,0.0,0.0,0.0
4,36b687iXNP8g84ulUXmPA7,26dSoYclwsYLMAKD3tpOr4,0eXGrEtsH0WVJnqp5imXAs,0.759606,0.456055,0.181818,0.836221,1.0,0.043144,0.221814,...,0.038974,0.6,1.0,0.0,0.75,0.0,0.0,0.0,0.0,0.0


In [21]:
# Generate recommendations using custom recommender
custom_recommendations = recommend_tracks(df_update, playvec, sp)

In [22]:
custom_recommendations

Unnamed: 0,Index,track_name,artist_name,pop
0,0,Regálame,Mario Bautista,39
1,1,No digas nada,Mario Bautista,46
2,2,Te Sigue Esperando Mi Corazón (feat. Dulce Mar...,Río Roma,33
3,3,Mi Persona Favorita,Río Roma,71
4,4,Azul Como El Cielo,Danna Paola,33
5,5,Azul Como El Cielo,Danna Paola,25
6,6,Flores Amarillas,Lola,47
7,7,Mágico,Lola,35
8,8,Mala,Saak,21
9,9,Una Noche,Saak,32


In [23]:
test

Unnamed: 0,track_uri,artist_uri,album_uri,danceability,energy,key,loudness,mode,speechiness,acousticness,...,duration_ms,time_signature,Track_release_date,Track_pop,Artist_pop,genre|latin_arena_pop,genre|latin_pop,genre|latin_viral_pop,genre|mexican_pop,genre|reggaeton_mexicano
0,1abN81EF3BNKoVpSxvQXPi,0AspLZGQkP38yddNoD0pLn,158sVr4HeR7ukWJMDGmdFw,0.758621,0.586914,0.545455,0.84419,0.0,0.029962,0.282108,...,0.030668,0.6,1.0,0.6,0.55,0.0,0.0,0.568848,0.486816,0.663086
1,2HsBez6K2kkqzkxHQqRusL,0AspLZGQkP38yddNoD0pLn,48FTvOrTDP6bQb5gTIkDbN,0.690148,0.753906,0.181818,0.861817,0.0,0.034377,0.139583,...,0.031063,0.6,1.0,0.55,0.55,0.0,0.0,0.568848,0.486816,0.663086
2,7CMv5aX1lpcEXxjslRvGeb,0AspLZGQkP38yddNoD0pLn,158sVr4HeR7ukWJMDGmdFw,0.747783,0.812012,0.818182,0.874736,1.0,0.052413,0.005239,...,0.025257,0.8,1.0,0.45,0.55,0.0,0.0,0.568848,0.486816,0.663086
3,5uri4QSUkltuHijM9aq1SQ,0AspLZGQkP38yddNoD0pLn,7aIMWGP4v42yVprFxcheBB,0.553695,0.621094,1.0,0.825777,0.0,0.061872,0.27402,...,0.031965,0.8,1.0,0.5,0.55,0.0,0.0,0.568848,0.486816,0.663086
4,0lmmvf2eUzkjNlDWctEibC,0AspLZGQkP38yddNoD0pLn,0fa1WYDpSKEHiCj3YTtsDq,0.650739,0.771973,0.636364,0.871114,1.0,0.050151,0.224877,...,0.02777,0.6,1.0,0.65,0.55,0.0,0.0,0.568848,0.486816,0.663086
5,0J94ibLoZFu0ShiGn3CkMh,0AspLZGQkP38yddNoD0pLn,0REeSaTC1AgGbNdutsa9yu,0.850246,0.754883,0.090909,0.861817,1.0,0.035099,0.047089,...,0.027513,0.8,1.0,0.45,0.55,0.0,0.0,0.568848,0.486816,0.663086
6,4oBIA6sGjOm60I9lmkHv45,0AspLZGQkP38yddNoD0pLn,158sVr4HeR7ukWJMDGmdFw,0.638424,0.819824,0.636364,0.862179,1.0,0.059106,0.066789,...,0.036652,0.8,1.0,0.35,0.55,0.0,0.0,0.568848,0.486816,0.663086
7,3vDLsRfjYfsjqLelscTVh3,0AspLZGQkP38yddNoD0pLn,2h0VcRKYrfkBHMH2rYEZrt,0.771921,0.721191,0.0,0.847449,1.0,0.094394,0.027911,...,0.026298,0.8,1.0,0.4,0.55,0.0,0.0,0.568848,0.486816,0.663086
8,0zL8pvTNR7Vj4yhpF0C70i,0AspLZGQkP38yddNoD0pLn,3EKH3tqxWdXSe5NjtIYqlB,0.827094,0.618164,0.363636,0.827407,0.0,0.064794,0.610294,...,0.033363,0.8,1.0,0.35,0.55,0.0,0.0,0.568848,0.486816,0.663086
9,3kviJWhyPA6gVOIBnMAvfm,0AspLZGQkP38yddNoD0pLn,158sVr4HeR7ukWJMDGmdFw,0.643842,0.594238,0.0,0.835376,1.0,0.061589,0.380392,...,0.021966,0.8,1.0,0.35,0.55,0.0,0.0,0.568848,0.486816,0.663086


In [24]:
# # Generate recommendations using Spotify's API
# spotify_recommendations = fetch_spotify_recommendations(test, sp)

In [25]:
# spotify_recommendations