# Song Analysis Using Spotify API

In [1]:
#import libraries
import sys
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
import time
from bs4 import BeautifulSoup as bs
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine

In [2]:
#Spotify authorization scope
scope = 'user-library-read'

In [3]:
#Spotify API credentials
%store -r spotify_cid
cid = spotify_cid
%store -r spotify_secret
secret = spotify_secret

In [4]:
#connect to Spotify through wrapper Spotipy
client_cred = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_cred)

### Functions to Obtain Song Features and Recommendations

In [5]:
def get_track_info(track,artist):
    '''
    function returns dictionary with track's info (including audio features)
    parameters:
        track-->str
        artist-->str
    '''
    #search Spotify API for general song info
    info_json = sp.search(q='artist:' + artist + ' track:' + track)

    #create dictionary with song info
    info = {'track':info_json['tracks']['items'][0]['name'],
            'artist':info_json['tracks']['items'][0]['album']['artists'][0]['name'],
            'track_id':info_json['tracks']['items'][0]['id'],
            'release_date':info_json['tracks']['items'][0]['album']['release_date']}
            
    #add audio features to dictionary
    audio_info = sp.audio_features(info['track_id'])[0]
    info['dance'] = audio_info['danceability']
    info['energy'] = audio_info['energy']
    info['loud'] = audio_info['loudness']
    info['speech'] = audio_info['speechiness']
    info['acoust'] = audio_info['acousticness']
    info['live'] = audio_info['liveness']
    info['valence'] = audio_info['valence']
    info['tempo'] = audio_info['tempo']
    
    return info

In [6]:
def get_sim_songs(id_list):
    '''
    function returns list of tup (track, artist) of songs similar to seed tracks
    parameters:
        id_list-->list of song ids (str) for seed tracks
    '''
    #find similar songs (using Spotify's bart algorithm)
    song_recs = sp.recommendations(limit=100,seed_tracks=[i for i in id_list])['tracks']
    
    #create list of tup (track,artist)
    sim_songs = [(s['name'],s['artists'][0]['name']) for s in song_recs]
    
    return sim_songs

In [36]:
def get_df(track_list):
    '''
    function returns dataframe with audio features for available songs
    parameters:
        track_list-->list of tup (track, artist) of songs
    '''
    d = []
    for track, artist in track_list:
        try:
            d.append(get_track_info(track.lower().replace("'",""),
                                    artist.lower().replace("'","")))
        except:
            print(track + ' by ' + artist + ' is not available')
            
    return pd.DataFrame(data=d)

In [7]:
def rank_by_features():
    
    return None

### Information on Seed Tracks

The lists below contain the top five tracks for each of country, R&B/hip-hop, and rock/alternative as of the week of May 15, 2021, based on Billboard Top 100 charts.

In [37]:
#country seed tracks
cty_songs = [('Forever After All','Luke Combs'),
             ('The Good Ones','Gabby Barrett'),
             ('Made for You','Jake Owen'),
             ('Hell of a View','Eric Church'),
             ('Breaking Up Was Easy in the 90s','Sam Hunt')]

#create dataframe with track information
df_cty = get_df(cty_songs)

In [46]:
#R&B/hip-hop seed tracks
rb_songs = [('Leave the Door Open','Bruno Mars'),
            ('Peaches (feat. Daniel Caesar & Giveon)','Justin Bieber'),
            ('Rapstar','Polo G'),
            ('Astronaut in the Ocean','Masked Wolf'),
            ('Up','Cardi B')]

#create dataframe with track information
df_rb = get_df(rb_songs)

In [48]:
#rock/alternative seed tracks
rock_songs = [('Without You','The Kid LAROI'),
              ('Your Power','Billie Eilish'),
              ("My Ex's Best Friend",'Machine Gun Kelly'),
              ('Mood','24kGoldn'),
              ('Therefore I Am','Billie Eilish')]

#create dataframe with track information
df_rock = get_df(rock_songs)

### Song Recommendations

Use get_sim_songs function to find song recommendations for respective genres and create dataframes with song information for recommended songs.

In [26]:
#country
sim_cty = get_sim_songs(df_cty['track_id'])
df_sim_cty = pd.DataFrame(data=[helper_func(t,a) for t,a in sim_cty])

In [34]:
#R&B/hip-hop
sim_rb = get_sim_songs(df_rb['track_id'])
d = []
for 
df_sim_rb = pd.DataFrame(data=[helper_func(t,a) for t,a in sim_rb])

AttributeError: 'NoneType' object has no attribute 'keys'

In [32]:
d

[{'track': 'Diva (feat. Lil Tecca)',
  'artist': 'The Kid LAROI',
  'track_id': '7gVwgc8b3XnO87TpmXXFA5',
  'release_date': '2020-01-31',
  'dance': 0.686,
  'energy': 0.703,
  'loud': -6.141,
  'speech': 0.0606,
  'acoust': 0.00728,
  'live': 0.0946,
  'valence': 0.457,
  'tempo': 144.902},
 {'track': 'VALENTINO - Imanbek Remix',
  'artist': '24kGoldn',
  'track_id': '660BgHpKo1jhR9MMSFn7CF',
  'release_date': '2020-06-12',
  'dance': 0.791,
  'energy': 0.795,
  'loud': -4.854,
  'speech': 0.0415,
  'acoust': 0.0161,
  'live': 0.362,
  'valence': 0.963,
  'tempo': 124.007},
 {'track': 'Jungle',
  'artist': 'A Boogie Wit da Hoodie',
  'track_id': '5uZm7EFtP5aoTJvx5gv9Xf',
  'release_date': '2016-09-16',
  'dance': 0.538,
  'energy': 0.52,
  'loud': -11.063,
  'speech': 0.737,
  'acoust': 0.308,
  'live': 0.214,
  'valence': 0.551,
  'tempo': 180.274},
 {'track': 'Versace on the Floor',
  'artist': 'Bruno Mars',
  'track_id': '0kN8xEmgMW9mh7UmDYHlJP',
  'release_date': '2016-11-17',
  '

In [19]:
#rock
#sim_rock = get_sim_songs(df_rock['track_id'])
#df_sim_rock = pd.DataFrame(data=[get_track_info(t.replace("'",''),a.replace("'",'')) for t,a in sim_rock]

IndexError: list index out of range

In [18]:
sim_rb

[('Be Alone', 'Blxst'),
 ('Ayy Macarena', 'Tyga'),
 ('Good as Hell', 'Lizzo'),
 ('Eleven', 'Khalid'),
 ("Pussy Poppin (I Don't Really Talk Like This)", 'Rico Nasty'),
 ('Classic', 'MKTO'),
 ('My Way', 'Ella Mai'),
 ('Beat Box 3 (feat. DaBaby)', 'SpotemGottem'),
 ('Starstruck', 'Years & Years'),
 ('Cabin Fever', 'Jaden'),
 ('Impatient (feat. Coi Leray)', 'DDG'),
 ('Red Kingdom', 'Tech N9ne'),
 ('Тлеет', 'Bula'),
 ('Rise Up (feat. Vamero)', 'VINAI'),
 ('Everything Black', 'Unlike Pluto'),
 ('Outta Time (feat. Drake)', 'Bryson Tiller'),
 ('queen of broken hearts', 'blackbear'),
 ('JETSKI (feat. Lil Mosey & Lil Tecca)', 'Internet Money'),
 ('Slide (Remix) (feat. Pop Smoke, A Boogie Wit da Hoodie & Chris Brown)',
  'H.E.R.'),
 ('Number 1', 'Kenndog'),
 ("Baby It's You - 2018", 'JoJo'),
 ('Breaking Me', 'Topic'),
 ('I.F.L.Y.', 'Bazzi'),
 ('Astronomy', 'Conan Gray'),
 ('BELIEVE IT', 'PARTYNEXTDOOR'),
 ('Majesty', 'Apashe'),
 ('Bestie (feat. Kodak Black)', 'Bhad Bhabie'),
 ('My Type', 'Saweeti

In [None]:
audio_features = [get_audio_features(i[2]) for i in track_info]

In [None]:
audio_meta = pd.DataFrame(audio_features, columns = ['track', 'dance' , 'energy' , 'key' , 'loudness' , 'mode' , 'speech' , 'acoust' , 
                       'instru' , 'live' , 'valence' , 'tempo' , 'duration' , 'time_signature'])

In [None]:
# Getting dummies for the following columns because they are not continuous varibles but either categorical or ordinal
dummies = audio_meta[['key','time_signature','mode']]

In [None]:
for i in dummies:
    dummies[i] = [str(num) for num in dummies[i]]

In [None]:
spread = pd.get_dummies(dummies)

In [None]:
for i in spread:
    spread[i] = [int(num) for num in spread[i]] 

### Ranking similar songs by using how close they are to seed tracks by euclidean distance

In [None]:
meta = pd.concat([audio_meta.drop(columns = ['track','key','time_signature','mode']),spread], axis = 1)

In [None]:
dist_out = 1-pairwise_distances(meta, metric="euclidean")

In [None]:
song_group = pd.DataFrame(dist_out).iloc[:, list(range(len(songs)))]

In [None]:
song_group["sum"] = song_group.sum(axis=1)

In [None]:
top_sim_songs = list(song_group.sort_values('sum', ascending = False).index)


In [None]:
top_sim_index = [i for i in top_sim_songs if i not in list(range(len(songs))) ]

### Using ranked songs to find more songs similar to seed tracks

In [None]:
rank = 0
while len(sim_songs) < 500:
    x = sp.recommendations(limit = 100,seed_tracks = [sim_songs[top_sim_index[rank]][2]])['tracks']
    for num in range(len(x)):
        track_name = x[num]['name']
        track_id = x[num]['id']
        artist = x[num]['artists'][0]['name']
        release_date = x[num]['album']['release_date']
        if [track_name,artist, track_id, release_date] not in sim_songs:
            sim_songs.append([track_name,artist, track_id, release_date])
    rank +=1
    print(rank, len(sim_songs))

In [None]:
songs_for_genius = pd.DataFrame(sim_songs)

In [None]:
songs_for_genius