# Song Analysis Using Spotify API

In [1]:
#import libraries
import sys
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
import time
from bs4 import BeautifulSoup as bs
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine

In [2]:
#Spotify authorization scope
scope = 'user-library-read'

In [3]:
#Spotify API credentials
%store -r spotify_cid
cid = spotify_cid
%store -r spotify_secret
secret = spotify_secret

In [4]:
#connect to Spotify through wrapper Spotipy
client_cred = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_cred)

### Functions to Obtain Song Features and Recommendations

In [35]:
def get_track_info(track,artist):
    '''
    function returns dictionary with track's info (including audio features)
    parameters:
        track-->str
        artist-->str
    '''
    #search Spotify API for general song info
    info_json = sp.search(q='artist:{} track:{}'.format(artist,track))['tracks']['items'][0]
    
    #obtain song's audio features
    audio_info = sp.audio_features(info_json['id'])[0]

    #create dictionary with song info
    info = {'track':info_json['name'],
            'artist':info_json['album']['artists'][0]['name'],
            'track_id':info_json['id'],
            'release_date':info_json['album']['release_date'],
            'dance':audio_info['danceability'],
            'energy':audio_info['energy'],
            'loud':audio_info['loudness'],
            'speech':audio_info['speechiness'],
            'acoust':audio_info['acousticness'],
            'live':audio_info['liveness'],
            'valence':audio_info['valence'],
            'tempo':audio_info['tempo']
           }
    
    return info

In [8]:
def get_similar_songs(track_info):
    '''
    function returns list of song information for songs similar to input songs (based on Spotify's bart algorithm)
    parameters:
        track_info-->list of song information (track,artist,track_id,release_date)
    '''
    sim_songs = []
    
    #find similar songs using song ids of inputs
    song_recs = sp.recommendations(limit=100,seed_tracks=[s[2] for s in track_info])['tracks'] #dictionary with similar songs
    
    for x in range(len(song_recs)): #update sim_songs list
        s = song_recs[x]
        sim_songs.append([s['name'],s['artists'][0]['name'],s['id'],s['album']['release_date']])
    
    return sim_songs

In [9]:
def rank_by_features():
    
    

SyntaxError: invalid syntax (<ipython-input-9-189b3640abbf>, line 1)

### Information on Seed Tracks

The lists below contain the top five tracks for each of country, R&B/hip-hop, and rock/alternative as of the week of May 15, 2021, based on Billboard Top 100 charts. The search_by_track_artist and get_audio_features functions are used to obtain information (including audio features) about said tracks.

In [36]:
#country seed tracks
country_songs = [('Forever After All','Luke Combs'),
                 ('The Good Ones','Gabby Barrett'),
                 ('Made for You','Jake Owen'),
                 ('Hell of a View','Eric Church'),
                 ('Breaking Up Was Easy in the 90s','Sam Hunt')]

#create dataframe with track information
df_country = pd.DataFrame(data=[get_track_info(t,a) for t,a in country_songs])

In [38]:
#R&B/hip-hop seed tracks
rb_songs = [('Leave the Door Open','Bruno Mars'),
            ('Peaches','Justin Bieber'),
            ('Rapstar','Polo G'),
            ('Astronaut in the Ocean','Masked Wolf'),
            ('Up','Cardi B')]

#create dataframe with track information
df_rb = pd.DataFrame(data=[get_track_info(t,a) for t,a in rb_songs])

In [39]:
#rock/alternative seed tracks
rock_songs = [('Without You','The Kid LAROI'),
              ('Your Power','Billie Eilish'),
              ("My Ex's Best Friend",'Machine Gun Kelly'),
              ('Mood','24kGoldn'),
              ('Therefore I Am','Billie Eilish')]

#create dataframe with track information
df_rock = pd.DataFrame(data=[get_track_info(t,a) for t,a in rock_songs])

### Song Recommendations

In [13]:
#use get_similar_songs function to find song recommendations
#add recommended songs to respective lists of seed tracks

country_info += get_similar_songs(country_info)
rb_info += get_similar_songs(rb_info)
rock_info += get_similar_songs(rock_info)

In [None]:
audio_features = [get_audio_features(i[2]) for i in track_info]

In [None]:
audio_meta = pd.DataFrame(audio_features, columns = ['track', 'dance' , 'energy' , 'key' , 'loudness' , 'mode' , 'speech' , 'acoust' , 
                       'instru' , 'live' , 'valence' , 'tempo' , 'duration' , 'time_signature'])

In [None]:
# Getting dummies for the following columns because they are not continuous varibles but either categorical or ordinal
dummies = audio_meta[['key','time_signature','mode']]

In [None]:
for i in dummies:
    dummies[i] = [str(num) for num in dummies[i]]

In [None]:
spread = pd.get_dummies(dummies)

In [None]:
for i in spread:
    spread[i] = [int(num) for num in spread[i]] 

### Ranking similar songs by using how close they are to seed tracks by euclidean distance

In [None]:
meta = pd.concat([audio_meta.drop(columns = ['track','key','time_signature','mode']),spread], axis = 1)

In [None]:
dist_out = 1-pairwise_distances(meta, metric="euclidean")

In [None]:
song_group = pd.DataFrame(dist_out).iloc[:, list(range(len(songs)))]

In [None]:
song_group["sum"] = song_group.sum(axis=1)

In [None]:
top_sim_songs = list(song_group.sort_values('sum', ascending = False).index)


In [None]:
top_sim_index = [i for i in top_sim_songs if i not in list(range(len(songs))) ]

### Using ranked songs to find more songs similar to seed tracks

In [None]:
rank = 0
while len(sim_songs) < 500:
    x = sp.recommendations(limit = 100,seed_tracks = [sim_songs[top_sim_index[rank]][2]])['tracks']
    for num in range(len(x)):
        track_name = x[num]['name']
        track_id = x[num]['id']
        artist = x[num]['artists'][0]['name']
        release_date = x[num]['album']['release_date']
        if [track_name,artist, track_id, release_date] not in sim_songs:
            sim_songs.append([track_name,artist, track_id, release_date])
    rank +=1
    print(rank, len(sim_songs))

In [None]:
songs_for_genius = pd.DataFrame(sim_songs)

In [None]:
songs_for_genius