In [1]:
# Import modules
import sys
# If your authentification script is not in the project directory
# append its folder to sys.path
# sys.path.append("../spotify_api_web_app")
import pandas as pd
from tqdm import tqdm
import time
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from decouple import config

In [2]:
df = pd.read_csv("music_characteristics_dataset.csv")

In [3]:
df["mood_vec"] = df[["valence", "energy", "acousticness", "instrumentalness", "speechiness", "mode"]].values.tolist()
df

Unnamed: 0,id,genre,track_name,artist_name,popularity,valence,energy,danceability,acousticness,tempo,speechiness,mode,instrumentalness,mood_vec
0,7GILkDXz9y9x5KPHBKdrHA,acoustic,Spring för livet,Sara Varga,0,0.5980,0.572,0.786,0.2930,119.943,0.0590,1,0.000000,"[0.598, 0.5720000000000001, 0.293, 0.0, 0.0590..."
1,2xwsf9FuFINP1X4FTsqZ7Q,acoustic,Peace,O.A.R.,62,0.4930,0.721,0.449,0.1660,178.141,0.0392,1,0.000000,"[0.493, 0.721, 0.166, 0.0, 0.0392, 1.0]"
2,4VJgrWjrkodaGiq3xKz62z,acoustic,Sometimes (I Wish),City and Colour,40,0.1350,0.467,0.523,0.3110,129.953,0.0281,0,0.000020,"[0.135, 0.467, 0.311, 2.01e-05, 0.0281, 0.0]"
3,46XMysg4VurmyAQ28tshqz,acoustic,The Mess I Made,Parachute,47,0.1730,0.591,0.405,0.0153,165.377,0.0339,1,0.000000,"[0.17300000000000001, 0.591, 0.0153, 0.0, 0.03..."
4,25J4d78ESH2MgAJoIakB8O,acoustic,Lost In The Light,Bahamas,50,0.2460,0.267,0.597,0.7930,75.535,0.0309,1,0.001400,"[0.24600000000000002, 0.267, 0.792999999999999..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3000,3CCGsOj2efotChiScMmCWg,world-music,A Minha Menina,Os Mutantes,0,0.4290,0.930,0.455,0.0568,91.410,0.1260,1,0.000000,"[0.429, 0.93, 0.0568, 0.0, 0.126, 1.0]"
3001,6bYpEZmbTldBmQtnnfDRDk,world-music,Raoui,Various Artists,0,0.6250,0.058,0.560,0.9260,107.658,0.0500,0,0.000003,"[0.625, 0.057999999999999996, 0.92599999999999..."
3002,1xJtdt697VaDWFJ1eEwlPO,world-music,Cat Bed Music,Pet Music World,5,0.0394,0.033,0.304,0.9920,71.282,0.0655,1,0.963000,"[0.0394, 0.033, 0.992, 0.963, 0.0655, 1.0]"
3003,1Nov1m4HA8PBs2vyWjp2Cv,world-music,Mas Que Nada,Jorge Ben Jor,0,0.5980,0.403,0.514,0.6720,89.672,0.0325,0,0.000366,"[0.598, 0.40299999999999997, 0.672, 0.000366, ..."


In [4]:
# CREATE CLIENT FUNCTION TO CONNECT US TO SPOTIFY
def client():
    client_id = config('SPOTIPY_CLIENT_ID')
    client_secret = config('SPOTIPY_CLIENT_SECRET')
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    return spotify

In [5]:
# GET TRACK ID

# 1. Get User input
# sample_track = input("Sample Track:")
# sample_artist = input("Sample Artist:")

# 2. Make list to hold track_id. Also artist_name and track_name to verify that it is the correct track
artist_name = []
track_name = []
track_id = []

# Testing sample
sample_track = 'country roads'
sample_artist = 'john denver'

# 3. Search Spotify for correct track
track_result = client().search(q=f'track:{sample_track} artist:{sample_artist}', limit=1, type='track')

# 4. Save details, especially track id
for i, t in enumerate(track_result['tracks']['items']):
    artist_name.append(t['artists'][0]['name'])
    track_name.append(t['name'])
    track_id.append(t['id'])

track_id = track_id[0]

In [24]:
# CREATE RECOMMENDATION FUNCTION
def recommend(id, ref_df, n_recs = 5):
    
    # Get audio features of given track from spotify api
    track_features = client().audio_features(track_id)[0]
    # Combine into mood vector
    track_moodvec = np.array([track_features['valence'], track_features['energy'], track_features['acousticness'], 
                              track_features['instrumentalness'], track_features['speechiness'], 
                              track_features['mode']])
    
    # Compute distances to all reference tracks
    ref_df["distances"] = ref_df["mood_vec"].apply(lambda x: np.linalg.norm(track_moodvec-np.array(x)))
    # Sort by popularity
    ref_df_sorted = ref_df.sort_values(by = "popularity", ascending = True)
    # Sort distances from lowest to highest
    ref_df_sorted = ref_df.sort_values(by = "distances", ascending = True) 
    
    # If the input track is in the reference set, it will have a distance of 0, but should not be recommended
    ref_df_sorted = ref_df_sorted[ref_df_sorted["id"] != track_id]

    ref_df_sorted = ref_df_sorted.set_index('track_name', 'artist_name', 'genre')
    
    # Return n recommendations
    return ref_df_sorted[['artist_name', 'genre']].iloc[:n_recs]

In [25]:
recommend(id = track_id, ref_df = df, n_recs = 5)

Unnamed: 0_level_0,Unnamed: 1_level_0,artist_name,genre
Unnamed: 0_level_1,track_name,Unnamed: 2_level_1,Unnamed: 3_level_1
900,À Segunda Vista,Baião D4 / Rastapé,forro
214,Midnight Rider,Allman Brothers Band,blues
182,Milk Cow Blues,Nitty Gritty Dirt Band,bluegrass
185,Daylight,Alison Krauss & Union Station,bluegrass
1585,ONE DAY,m-flo,j-pop
