# Song Analysis Using Spotify API

In [1]:
#import libraries
import sys
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
import time
from bs4 import BeautifulSoup as bs
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine

In [2]:
#Spotify authorization scope
scope = 'user-library-read'

In [4]:
#Spotify API credentials
%store -r spotify_cid
cid = spotify_cid
%store -r spotify_secret
secret = spotify_secret

In [5]:
#connect to Spotify through wrapper Spotipy
client_cred = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_cred)

### Seed tracks

The lists below contain the top five tracks for each of country, R&B/hip-hop, and rock/alternative as of the week of May 15, 2021, based on Billboard Top 100 charts.

In [6]:
#top 5 from Billboard Top 100 for country
country_songs = [('Forever After All','Luke Combs'),
                 ('The Good Ones','Gabby Barrett'),
                 ('Made for You','Jake Owen'),
                 ('Hell of a View','Eric Church'),
                 ('Breaking Up Was Easy in the 90s','Sam Hunt')]

In [7]:
#top 5 from Billboard Top 100 for R&B/hip-hop
rb_songs = [('Leave the Door Open','Silk Sonic'),
            ('Peaches','Justin Bieber'),
            ('Rapstar','Polo G'),
            ('Astronaut in the Ocean','Masked Wolf'),
            ('Up','Cardi B')]

In [8]:
#top 5 from Billboard Top 100 for rock/alternative
rock_songs = [('Without You','The Kid LAROI'),
              ('Your Power','Billie Eilish'),
              ("My Ex's Best Friend",'by Machine Gun Kelly'),
              ('Mood','24kGoldn'),
              ('Therefore I Am','Billie Eilish')]

### Obtain Song Features

The following functions allow the user to search for a track and obtain its audio features from Spotify.

In [10]:
def search_by_track_artist(artist, track):
    '''
    function returns list with track name, artist, identification number, and release date
    parameters:
        artist-->str
        track-->str
    '''
    info_json = sp.search(q='artist:{} track:{}'.format(artist, track))
    track = info_json['tracks']['items'][0]['name']
    track_id = info_json['tracks']['items'][0]['id']
    release_date = info_json['tracks']['items'][0]['album']['release_date']
    artist = info_json['tracks']['items'][0]['album']['artists'][0]['name']

    output = [track,artist,track_id,release_date]
    
    return output

In [11]:
def get_audio_features(track):
    '''
    function returns song's audio features
    parameters:
        track-->str
    '''
    info = sp.audio_features(track)[0]
    dance = info['danceability']
    energy = info['energy']
    key = info['key']
    loudness = info['loudness']
    mode = info['mode']
    speech = info['speechiness']
    acoust = info['acousticness']
    instru = info['instrumentalness']
    live = info['liveness']
    valence = info['valence']
    tempo = info['tempo']
    duration = info['duration_ms']
    time_signature = info['time_signature']
    track_info = [track,dance,energy,key,loudness,mode,speech,acoust, 
                  instru,live,valence,tempo,duration,time_signature]
    
    return track_info

In [9]:
track_info = []
for song,artist in songs:
    try:
        #trying to search for a song and return a result, but if it cant find a song, then it will print below
        track_info.append(search_by_track_artist(artist , song))
    except:
        print(song,artist)

Leave the Door Open Silk Sonic


In [10]:
ids_for_rec = [i[2]for i in track_info]

### Recommendations

In [11]:
sim_songs = []
x = sp.recommendations(limit = 100,seed_tracks = ids_for_rec)['tracks']
for num in range(len(x)):
    track_name = x[num]['name']
    track_id = x[num]['id']
    artist = x[num]['artists'][0]['name']
    release_date = x[num]['album']['release_date']
    sim_songs.append([track_name,artist, track_id, release_date])

In [12]:
track_info.extend(sim_songs)

In [13]:
audio_features = [get_audio_features(i[2]) for i in track_info]

In [14]:
audio_meta = pd.DataFrame(audio_features, columns = ['track', 'dance' , 'energy' , 'key' , 'loudness' , 'mode' , 'speech' , 'acoust' , 
                       'instru' , 'live' , 'valence' , 'tempo' , 'duration' , 'time_signature'])

In [15]:
# Getting dummies for the following columns because they are not continuous varibles but either categorical or ordinal
dummies = audio_meta[['key','time_signature','mode']]

In [16]:
for i in dummies:
    dummies[i] = [str(num) for num in dummies[i]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dummies[i] = [str(num) for num in dummies[i]]


In [17]:
spread = pd.get_dummies(dummies)

In [18]:
for i in spread:
    spread[i] = [int(num) for num in spread[i]] 

### Ranking similar songs by using how close they are to seed tracks by euclidean distance

In [19]:
meta = pd.concat([audio_meta.drop(columns = ['track','key','time_signature','mode']),spread], axis = 1)

In [20]:
dist_out = 1-pairwise_distances(meta, metric="euclidean")

In [21]:
song_group = pd.DataFrame(dist_out).iloc[:, list(range(len(songs)))]

In [22]:
song_group["sum"] = song_group.sum(axis=1)

In [23]:
top_sim_songs = list(song_group.sort_values('sum', ascending = False).index)


In [24]:
top_sim_index = [i for i in top_sim_songs if i not in list(range(len(songs))) ]

### Using ranked songs to find more songs similar to seed tracks

In [25]:
rank = 0
while len(sim_songs) < 500:
    x = sp.recommendations(limit = 100,seed_tracks = [sim_songs[top_sim_index[rank]][2]])['tracks']
    for num in range(len(x)):
        track_name = x[num]['name']
        track_id = x[num]['id']
        artist = x[num]['artists'][0]['name']
        release_date = x[num]['album']['release_date']
        if [track_name,artist, track_id, release_date] not in sim_songs:
            sim_songs.append([track_name,artist, track_id, release_date])
    rank +=1
    print(rank, len(sim_songs))

1 200
2 298
3 394
4 490
5 579


In [26]:
songs_for_genius = pd.DataFrame(sim_songs)

In [27]:
songs_for_genius

Unnamed: 0,0,1,2,3
0,16 Shots,Stefflon Don,2169IKAivSUUFGcedGASXc,2017-05-22
1,oops!!! (with Lil Wayne),Yung Gravy,0O014lR2VQ3JpMHA7KFyy5,2021-03-05
2,PILL BREAKER (feat. blackbear & Machine Gun Ke...,Trippie Redd,7jjnn6K7p5MgOFuqLypAcM,2021-02-19
3,Act Up,City Girls,3A2yGHWIzmGEIolwonU69h,2018-11-16
4,BELIEVE IT,PARTYNEXTDOOR,4HDCLYli2SUdkq9OjmvhSD,2020-03-27
...,...,...,...,...
574,Right Back (feat. A Boogie Wit Da Hoodie),Khalid,6PYnUsNEpYC7A4BS2sjw3L,2019-08-13
575,Glitter,BENEE,23TPP1eeElFfvYVznskwCY,2019-06-28
576,Heather,Conan Gray,4xqrdfXkTW4T0RauPLv3WA,2020-03-20
577,Strawberries & Cigarettes,Troye Sivan,3afkJSKX0EAMsJXTZnDXXJ,2018-03-16
