# Music Recommender - Find New Songs Based on Content of Favorite Songs

In [941]:
# Imports
import pandas as pd
import numpy as np
import spotipy
import sys
sys.path.insert(0, '../../../')
from api_keys import client_id, client_secret
from spotipy.oauth2 import SpotifyClientCredentials
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import cdist

# Read in data
tracks = pd.read_csv('data/cleaned/track_data.csv')
meta_scaled = pd.read_csv('data/cleaned/scaled_with_metadata.csv')

## Recommendation System

This recommender system will rely on the assumption that by taking the means of audio features for a set of songs, we can then take the nearest n songs around that mean vector as being most related / similar. If these songs are similar to the user's songs of choice, then the user should like them, or at least find them musically interesting.

In [942]:
ss = StandardScaler()
ss.fit(tracks.select_dtypes(include = 'number').drop(columns = 'year'))

# Using spotipy's SpotifyOAuth class so that we can use spotify's API w/ proper authentication
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))

def get_scaled_feats(song):
    '''
    Helper function to recommend(). Takes a song as input in the form (tile, release year) and returns a dataframe
    containing that song's scaled audio features.
    
    If song is available in the downloaded scaled tracks data, will fetch from there. Otherwise,
    uses spotipy to grab the song's audio features from Spotify, then scales them.
    '''
    
    name, year = song
    track_df = meta_scaled[(meta_scaled['name'] == name) & (meta_scaled['year'] == year)]
    track_df = track_df.copy(deep = True) # Was giving a warning otherwise (changing value of a slice)
    
    if len(track_df) != 0:
        track_df.reset_index(drop = True)
        return pd.DataFrame([track_df.iloc[0]]).drop(columns = ['name','artists', 'year', 'genre'])
    
    else:
        
        song_data = {}
        fetch = sp.search(q = f'track: {name} year: {year}', limit = 1)

        if fetch['tracks']['items'] != []:

            features = sp.audio_features(fetch['tracks']['items'][0]['id'])[0] # Grabbing 1st / most relevant search result
            
            del features['track_href'], features['analysis_url'], features['uri'], features['type']

            song_data['explicit'] = int(fetch['tracks']['items'][0]['explicit'])
            song_data['popularity'] = fetch['tracks']['items'][0]['popularity']

            for key, value in features.items():
                song_data[key] = value
                
            song_data.pop('id')
            df_cols = song_data.keys()
        
            return pd.DataFrame(ss.transform(pd.DataFrame([song_data])), columns = df_cols)
        
        else:
            
            return None 

def recommend(songlist, n = 5):
    '''
    Taking in a list of songs, each of the format (title, release year), returns n songs as
    recommendations for the user to explore.
    
    Note: These recommendations are only sourced from the downloaded data, not from all of spotify's
    available songs.
    '''
    vectors = pd.DataFrame()
    name_list = []
    for song in songlist:
        name, year = song
        name_list.append(name)
        vector = get_scaled_feats(song)
        if vector is None:
            print(f'{name} cannot be found on Spotify! Sorry.')
            continue
        else:
            vectors = pd.concat([vectors, vector])
    
    # Creating centroid vector (already scaled)
    centroid =  pd.DataFrame(vectors.mean(numeric_only = True)).T
    
    # Calculating distances from centroid for all tracks -- looking for closest ones
    distances = cdist(centroid.iloc[:,:], meta_scaled.select_dtypes(include = 'number').drop(columns = 'year').iloc[:,:], 'cosine') 
    distances = pd.DataFrame({'distance': distances[0]}).sort_values('distance', ascending = True)
    
    index = list(distances.index[:n])
    
    recs = tracks.iloc[index]
    recs = recs[~recs['name'].isin(name_list)]
    recs.reset_index(drop = True, inplace = True)
    
    return recs[['name', 'artists', 'year', 'genre']]

#### Testing:

In [943]:
recommend([('Lte', 2015)], 5)

Unnamed: 0,name,artists,year,genre
0,Dance with Me,"['Shiloh Dynasty', 'itssvd']",2018,lo-fi chill
1,Tell Me Why I'm Waiting,['Timmies'],2017,lo-fi chill
2,Qué bonito fue,['El David'],2019,indie triste
3,City Of Stars,['Ryan Gosling'],2016,hollywood
4,Twisted Nerve,['Bernard Herrmann'],2003,classic soundtrack


In [944]:
recommend([('$outh$ide $uicide', 2015)], 5)

Unnamed: 0,name,artists,year,genre
0,$Outh $Ide $Uicide,"['$uicideboy$', 'Pouya']",2015,dark trap
1,Lte,['$uicideboy$'],2015,dark trap
2,Lurk,['The Neighbourhood'],2014,modern alternative rock
3,Her Life,['Two Feet'],2016,indie poptimism
4,Бандит,['OG Buda'],2019,russian drill


In [945]:
recommend([('Бандит', 2019)], 5)

Unnamed: 0,name,artists,year,genre
0,Say No Mo,['kizaru'],2019,russian emo rap
1,JUANA,['Akuma'],2020,german underground rap
2,Glo På Mig,['Tessa'],2020,danish pop
3,Chin Chin,"['Noga Erez', 'ECHO']",2019,israeli hip hop
4,Dope Sport,"['HGEMONA$', 'Moose', 'Sardos97']",2021,greek drill


In [946]:
recommend([('Firework', 2012)], 5)

Unnamed: 0,name,artists,year,genre
0,Turning Page,['Sleeping At Last'],2012,ambient worship
1,Talking to the Moon - Acoustic Piano,['Bruno Mars'],2010,dance pop
2,Sparks,['Coldplay'],2000,permanent wave
3,Streetcar,['Daniel Caesar'],2015,canadian contemporary r&b
4,The Trouble with Wanting,['Joy Williams'],2019,acoustic pop


In [947]:
recommend([('Royalty', 2019)], 5)

Unnamed: 0,name,artists,year,genre
0,Dansplaat,['Brainpower'],2002,dutch hip hop
1,Пачки,['lil krystalll'],2019,russian drill
2,NO CALIBRAN,"['La Kikada', 'La Elegancia Prod']",2021,rap dominicano
3,Hoodboy - Remix,"['Coyote Jo Bastard', 'Sfera Ebbasta']",2020,french hip hop
4,Traîtres,['Lacrim'],2017,arabic hip hop


In [948]:
recommend([('Come AS You Are', 1991),
           ('Smells Like Teen Spirit', 1992), 
           ('Lithium', 1992),
           ('All Apologies', 1993),
           ('Stay Away', 1993)], 10)

Unnamed: 0,name,artists,year,genre
0,Stars,['Callalily'],2006,opm
1,Zindagi Kuch Toh Bata,"['Pritam', 'Rahat Fateh Ali Khan', 'Rekha Bhar...",2015,desi pop
2,De l'or,"['Vitaa', 'Slimane']",2020,francoton
3,Since You've Been Gone,['DAY26'],2008,boy band
4,晴天,['Jay Chou'],2003,c-pop
5,King and Queen of Hearts,['David Pomeranz'],2011,opm
6,Dyin' Ain't Much Of A Livin',['Jon Bon Jovi'],1990,hard rock
7,Goodness of God (Live),"['Bethel Music', 'Jenn Johnson']",2019,ambient worship
8,Inner City Blues,['Rodríguez'],1970,folk rock
9,Arrancacorazones,['Attaque 77'],2003,argentine punk


In [949]:
recommend([('Firework', 2012), ('Everlong', 1997)], 5)

Unnamed: 0,name,artists,year,genre
0,drivers license,['Olivia Rodrigo'],2021,pop
1,SLOW DANCING IN THE DARK,['Joji'],2018,alternative r&b
2,Best Friend,['Rex Orange County'],2017,bedroom pop
3,What Other People Say,"['Sam Fischer', 'Demi Lovato']",2021,australian pop
4,Till Forever Falls Apart,"['Ashe', 'FINNEAS']",2021,alt z


In [950]:
recommend([('Beat It', 1982),
           ('Billie Jean', 1988),
           ('Thriller', 1982)], 10)

Unnamed: 0,name,artists,year,genre
0,Muskrat Ramble - Live; 2001 Remastered Version,['Louis Armstrong'],1947,adult standards
1,Song for My Son,['Lee Oskar'],1980,harmonica jazz
2,Gloria - Live,['Umberto Tozzi'],1980,classic italian pop
3,Blautar varir,['SSSól'],1991,classic icelandic pop
4,Frække Frida,['Eventyrteatrets Børn'],1994,bornesange
5,Lessons In Love - Live At Wembley,['Level 42'],1987,art rock
6,Night Cruise (Live Version),['T-SQUARE'],2016,anime score
7,Bál az Operában,['KFT'],1996,classic hungarian pop
8,Enjoy Yourself - Live 2016,['The Busters'],2016,euroska


#### Now that the recommender is working, it is time to create Streamlit app so users can more easily interact with it. See app.py where most of the code above is borrowed.