In [1]:
from api_keys import *

import billboard
import spotipy
import lyricsgenius
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

Helper functions:

In [2]:
sp = spotipy.Spotify(client_credentials_manager = SpotifyClientCredentials(client_id = SPOTIFY_ID, client_secret = SPOTIFY_SECRET))
genius = lyricsgenius.Genius(GENIUS_ACCESS_TOKEN)
genius.verbose = False
genius.remove_section_headers = True

def getCharts(*dates):
    return pd.concat( getSingleChart(date) for date in dates )

def getSingleChart(date):
    chart = billboard.ChartData("hot-100", date = date)
    
    array = [ (getSpotifyURI(song.title, song.artist), position + 1, song.title, song.artist, date) for position, song in enumerate(chart) ]
    
    return pd.DataFrame(array, columns = ["spotify_uri", "position", "title", "artist", "date"])

def getSongFeatures(spotify_uris):
    
    # Note that we take the unique set of uris
    # so you should expect this dataframe to have fewer rows than the charts df
    # bc songs may be on the charts for more than one month & thus show up several times in the df
    
    return pd.concat( getSingleSongFeatures(uri) for uri in set(spotify_uris) if uri is not None )

def getSingleSongFeatures(spotify_uri):
    features = sp.audio_features(spotify_uri)[0]
    
    if features is None:
        return pd.DataFrame({"uri": [spotify_uri]})
    
    # Pandas wants dict values to be lists
    return pd.DataFrame({ k:[v] for k, v in features.items() }) \
        [["uri", "danceability", "energy", "key", "loudness", "mode",
          "speechiness", "acousticness", "instrumentalness", "liveness",
          "valence", "tempo", "duration_ms", "time_signature"]]

def getSongLyrics(spotify_uris, chartsTable):
    title = lambda uri: tuple(
        chartsTable \
            .query(f"spotify_uri == '{uri}'") \
            .head(1) \
            [["title", "artist"]] \
            .iloc[0]
        )
    
    return pd.concat( getSingleSongLyrics(uri, *title(uri)) for uri in spotify_uris if uri is not None )

def getSingleSongLyrics(spotify_uri, title, artist):
    lyrics = genius.search_song(title, artist).lyrics
    
    return pd.DataFrame({"spotify_uri": [spotify_uri], "lyrics": [lyrics]})

def getSpotifyURI(title, artist):
    try:
        return sp.search(q = f"{title} {artist}", type = "track", limit = 1)["tracks"]["items"][0]["uri"]
    except IndexError:
        return None

Data:

In [3]:
chartsTable = getCharts("2020-02-01", "2020-03-01")

In [4]:
chartsTable

Unnamed: 0,spotify_uri,position,title,artist,date
0,spotify:track:33WF2L65nyyQkGrQhtzFa6,1,The Box,Roddy Ricch,2020-02-01
1,,2,Life Is Good,Future Featuring Drake,2020-02-01
2,,3,Godzilla,Eminem Featuring Juice WRLD,2020-02-01
3,spotify:track:21jGcNKet2qwijlDFuPiPb,4,Circles,Post Malone,2020-02-01
4,spotify:track:2b8fOow8UzyDFAE27YhOZM,5,Memories,Maroon 5,2020-02-01
...,...,...,...,...,...
95,spotify:track:2VPmBOuy7ZAOFSzKwW2IEt,96,Ridin' Roads,Dustin Lynch,2020-03-01
96,spotify:track:6cZH4rX1KTt1aJ3Ql6Ynja,97,Me And My Guitar,A Boogie Wit da Hoodie,2020-03-01
97,spotify:track:6XXYdF6pJR1K3wKvuxmu7n,98,Feel Me,Selena Gomez,2020-03-01
98,spotify:track:2n7ohqg1s68ToAHh4u9mcW,99,Vete,Bad Bunny,2020-03-01


In [5]:
audioTable = getSongFeatures(chartsTable.spotify_uri)

In [6]:
audioTable

Unnamed: 0,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,spotify:track:4vTgx6h4seHvkuFh84JXYP,0.674,0.640,0.0,-5.139,0.0,0.0339,0.1510,0.000000,0.0925,0.664,99.908,234458.0,4.0
0,spotify:track:2Fxmhks0bxGSBdJ92vM42m,0.701,0.425,7.0,-10.965,1.0,0.3750,0.3280,0.130000,0.1000,0.562,135.128,194088.0,4.0
0,spotify:track:2Ec33AVlkTTq8BHFgBTdQs,0.881,0.457,7.0,-8.191,0.0,0.1560,0.0327,0.000000,0.2310,0.296,136.970,198913.0,4.0
0,spotify:track:0fySG6A6qLE8IvDpayb5bM,0.768,0.652,1.0,-2.708,0.0,0.3070,0.1130,0.000000,0.1070,0.777,154.187,144935.0,4.0
0,spotify:track:3qHgGyJY4GpXNOK4WL4NSo,0.485,0.814,9.0,-3.907,1.0,0.1380,0.2170,0.000000,0.1120,0.327,159.894,156077.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,spotify:track:2wrJq5XKLnmhRXHIAf9xBa,0.654,0.630,10.0,-4.644,1.0,0.0259,0.1530,0.000000,0.1110,0.430,89.991,167693.0,4.0
0,spotify:track:1OuN92HcVG6NVpWbeESNB3,0.875,0.599,9.0,-9.077,0.0,0.1380,0.0333,0.014300,0.1130,0.112,152.061,256507.0,4.0
0,spotify:track:0KVEBQA98JvkmB5ZydQ13k,0.506,0.787,8.0,-6.634,1.0,0.1610,0.2540,0.000000,0.1120,0.333,99.956,175339.0,4.0
0,spotify:track:6ZvAXIWw3rw8WR0Xlmifri,0.621,0.791,9.0,-3.343,1.0,0.0265,0.0361,0.000000,0.1430,0.734,95.075,209110.0,4.0


In [7]:
lyricsTable = getSongLyrics(chartsTable.spotify_uri, chartsTable)

In [8]:
lyricsTable

Unnamed: 0,spotify_uri,lyrics
0,spotify:track:33WF2L65nyyQkGrQhtzFa6,Pullin' out the coupe at the lot\nTold 'em fuc...
0,spotify:track:21jGcNKet2qwijlDFuPiPb,"Oh, oh, oh\nOh, oh, oh\nOh, oh, oh, oh, oh\n..."
0,spotify:track:2b8fOow8UzyDFAE27YhOZM,Here's to the ones that we got\nCheers to the ...
0,spotify:track:2wrJq5XKLnmhRXHIAf9xBa,"Do you love the rain, does it make you dance\n..."
0,spotify:track:2XU0oxnq2qxCpomAAuJY8K,"They say, ""Oh my God, I see the way you shine\..."
...,...,...
0,spotify:track:2VPmBOuy7ZAOFSzKwW2IEt,"Ridin' roads\nRidin' roads\n\nUh, yeah\nThis t..."
0,spotify:track:6cZH4rX1KTt1aJ3Ql6Ynja,"Yeah, I loved her too hard, she left me here w..."
0,spotify:track:6XXYdF6pJR1K3wKvuxmu7n,"No one love you like I love ya\nNever cheat, n..."
0,spotify:track:2n7ohqg1s68ToAHh4u9mcW,"(Yeh-yeh-yeh-yeh, yeh-yeh-yeh-yeh)\nSi te vas,..."
