# Song Analysis Using Spotify API

In [1]:
#import libraries
import sys
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
from sklearn.metrics import pairwise_distances

In [2]:
#Spotify authorization scope
scope = 'user-library-read'

In [3]:
#Spotify API credentials
%store -r spotify_cid
cid = spotify_cid
%store -r spotify_secret
secret = spotify_secret

In [4]:
#connect to Spotify through wrapper Spotipy
client_cred = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_cred)

### Functions

In [5]:
def get_track_info(track,artist):
    '''
    function returns dictionary with track's info (including audio features)
    parameters:
        track-->str
        artist-->str
    '''
    #search Spotify API for general song info
    info_json = sp.search(q='artist:' + artist + ' track:' + track)

    #create dictionary with song info
    info = {'track':info_json['tracks']['items'][0]['name'],
            'artist':info_json['tracks']['items'][0]['album']['artists'][0]['name'],
            'track_id':info_json['tracks']['items'][0]['id'],
            'release_date':info_json['tracks']['items'][0]['album']['release_date']}
            
    #add audio features to dictionary
    audio_info = sp.audio_features(info['track_id'])[0]
    info['dance'] = audio_info['danceability']
    info['energy'] = audio_info['energy']
    info['loud'] = audio_info['loudness']
    info['speech'] = audio_info['speechiness']
    info['acoust'] = audio_info['acousticness']
    info['live'] = audio_info['liveness']
    info['valence'] = audio_info['valence']
    info['tempo'] = audio_info['tempo']
    
    return info

In [6]:
def get_sim_songs(id_list):
    '''
    function returns list of tup (track, artist) of songs similar to seed tracks
    parameters:
        id_list-->list of song ids (str) for seed tracks
    '''
    #find similar songs (using Spotify's bart algorithm)
    song_recs = sp.recommendations(limit=100,seed_tracks=[i for i in id_list])['tracks']
    
    #create list of tup (track,artist)
    sim_songs = [(s['name'],s['artists'][0]['name']) for s in song_recs]
    
    return sim_songs

In [7]:
def get_df(track_list):
    '''
    function returns dataframe with audio features for available songs
    parameters:
        track_list-->list of tup (track, artist) of songs
    '''
    d = []
    for track, artist in track_list:
        try:
            d.append(get_track_info(track.lower().replace("'",""),
                                    artist.lower().replace("'","")))
        except:
            print(track + ' by ' + artist + ' is not available')
            
    return pd.DataFrame(data=d)

In [8]:
def rank_by_features(df_seed,df_rec):
    '''
    function returns df_rec sorted by similarity to seed tracks
    parameters:
        df_seed-->dataframe with seed tracks
        df_rec-->dataframe with songs recommended by Spotify API
    '''
    #calculate average values in seed tracks
    row_df = pd.DataFrame(data=[df_seed.mean(axis=0)],index=['seed_avg'])
    
    #calculate Euclidean distance with seed averages
    df_rec['sim_score'] = 1-pairwise_distances(df_rec.drop(columns=['track','track_id','artist','release_date']),
                                               row_df.to_numpy()[0,None],
                                               metric='euclidean')
   
    #sort dataframe by similarity to seed averages
    return df_rec.sort_values(by='sim_score',ascending=False)

### Information on Seed Tracks

The lists below contain the top five tracks for each of country, R&B/hip-hop, and rock/alternative as of the week of May 15, 2021, based on Billboard Top 100 charts.

In [9]:
#country seed tracks
cty_songs = [('Forever After All','Luke Combs'),
             ('The Good Ones','Gabby Barrett'),
             ('Made for You','Jake Owen'),
             ('Hell of a View','Eric Church'),
             ('Breaking Up Was Easy in the 90s','Sam Hunt')]

#create dataframe with track information
df_cty = get_df(cty_songs)

In [10]:
#R&B/hip-hop seed tracks
rb_songs = [('Leave the Door Open','Bruno Mars'),
            ('Peaches (feat. Daniel Caesar & Giveon)','Justin Bieber'),
            ('Rapstar','Polo G'),
            ('Astronaut in the Ocean','Masked Wolf'),
            ('Up','Cardi B')]

#create dataframe with track information
df_rb = get_df(rb_songs)

In [11]:
#rock/alternative seed tracks
rock_songs = [('Without You','The Kid LAROI'),
              ('Your Power','Billie Eilish'),
              ("My Ex's Best Friend",'Machine Gun Kelly'),
              ('Mood','24kGoldn'),
              ('Therefore I Am','Billie Eilish')]

#create dataframe with track information
df_rock = get_df(rock_songs)

### Song Recommendations

Use get_sim_songs function to find song recommendations for respective genres and create dataframes with song information for recommended songs.

In [12]:
#country
sim_cty = get_sim_songs(df_cty['track_id'])
df_sim_cty = get_df(sim_cty)

In [13]:
#R&B/hip-hop
sim_rb = get_sim_songs(df_rb['track_id'])
df_sim_rb = get_df(sim_rb)

Stylin' by Stellar is not available
3am Thoughts by Stellar is not available
Repaid by Lost Haven is not available


In [14]:
#rock/alternative
sim_rock = get_sim_songs(df_rock['track_id'])
df_sim_rock = get_df(sim_rock)

Blackout (feat. Tory Lanez) by Imanbek is not available
Intro by Kenndog is not available


### Rank Songs by Similarity with Seed Tracks Using Euclidean Distance

In [15]:
#country
df_sim_cty = rank_by_features(df_cty,df_sim_cty)

In [16]:
#R&B/hip-hop
df_sim_rb = rank_by_features(df_rb,df_sim_rb)

In [17]:
#rock/alternative
df_sim_rock = rank_by_features(df_rock,df_sim_rock)

### Find More Songs Similar to Seed Tracks Using Ranked Songs

In [42]:
def get_more_songs(df_seed,df_rec):
    '''
    function returns dataframe with 1000+ similar tracks
        *feeds songs most similar to seed tracks (based on Euclidean distance) through Spotify recommender
        *songs recommended based on input of most similar song are added to dataframe
        *duplicates are dropped
    dataframe will be used to obtain lyrics from Genius API
        *different notebook
    
    parameters:
        df_seed-->dataframe with seed tracks
        df_rec-->dataframe with songs recommended by Spotify API
            *ranked by similarity to seed tracks
    '''
    #combine df_seed and df_rec into one dataframe
    df = pd.concat([df_seed,df_rec])
    
    #find songs using those deemed most similar to seed tracks
    rank = len(df_seed)
    while len(df) < 1000:
        sim = get_sim_songs([df.iloc[rank]['track_id']]) #get similar tracks
        df_sim = get_df(sim) #get track info and put in dataframe
        df_sim = rank_by_features(df_seed,df_sim) #obtain similarity scores
        df = pd.concat([df,df_sim]) #add new songs to existing dataframe
        df.drop_duplicates(inplace=True) #remove duplicates
        rank+=1
    
    return df.reset_index(drop=True,inplace=True)

In [None]:
d = get_more_songs(df_cty,df_sim_cty)