In [1]:
!pip install spotipy



In [2]:
# Acknowledgements:
# https://medium.com/swlh/how-to-leverage-spotify-api-genius-lyrics-for-data-science-tasks-in-python-c36cdfb55cf3

In [3]:
import pandas as pd
import re
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from bs4 import BeautifulSoup as BS
import requests
import time

In [4]:
cid = 'b1e13d47b67249bda69ed912bb4f7133'
secret = 'a733eca54ee84bb1b8d95b67cda8c91e'

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [5]:
# Let's see if I can gather data from a playlist of Iron Maiden:
# First, I'll get the track IDs from the "This is Iron Maiden" playlist

def getTrackIDs(playlist_id):
    ids = []
    playlist = sp.playlist(playlist_id)
    for song in playlist['tracks']['items']:
        track = song['track']
        ids.append(track['id'])
    print('I could find {num_tracks} songs'.format(num_tracks=len(ids)))
    return ids

ti_iron_maiden = 'spotify:playlist:37i9dQZF1DZ06evO3KIUZW'
ids = getTrackIDs(ti_iron_maiden)

I could find 50 songs


In [6]:
# Next, I build a function to retrieve all the useful info for my dataset:
def getTrackFeatures(item):
    metadata = sp.track(item)
    features = sp.audio_features(item)
    
    #metadata:
    name = metadata['name']
    album = metadata['album']['name']
    album_cover = metadata['album']['images'][0]['url']
    artist = metadata['album']['artists'][0]['name']
    release_date = metadata['album']['release_date']
    length = metadata['duration_ms']
    popularity = metadata['popularity']
    sample = metadata['preview_url']
    
    #audio analysis features:
    acousticness = features[0]['acousticness']
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    loudness = features[0]['loudness']
    speechiness = features[0]['speechiness']
    tempo = features[0]['tempo']
    time_signature = features[0]['time_signature']
    
    track = [name,album,album_cover,artist,sample,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature]
    return track

In [7]:
# Now I loop over track ids:
tracks = []
for item in ids:
    track = getTrackFeatures(item)
    time.sleep(3)
    tracks.append(track)

In [8]:
tracks

[['The Trooper - 2015 Remaster',
  'Piece of Mind (2015 Remaster)',
  'https://i.scdn.co/image/ab67616d0000b2732277d1a849a63c7075ec38ff',
  'Iron Maiden',
  'https://p.scdn.co/mp3-preview/a373cf851d1635e2cc904d7b83a2dc9eb80dc00e?cid=b1e13d47b67249bda69ed912bb4f7133',
  '1983',
  252733,
  74,
  0.0318,
  0.285,
  0.908,
  0.00909,
  0.311,
  -4.601,
  0.0705,
  159.756,
  4],
 ['Run to the Hills - 2015 Remaster',
  'The Number of the Beast (2015 Remaster)',
  'https://i.scdn.co/image/ab67616d0000b2735c29a88ba5341ca428f0c322',
  'Iron Maiden',
  'https://p.scdn.co/mp3-preview/c315f66924d7e6427784fbda66d1772f3cbfda89?cid=b1e13d47b67249bda69ed912bb4f7133',
  '1982',
  233506,
  71,
  0.0286,
  0.249,
  0.943,
  0.00401,
  0.162,
  -5.188,
  0.0669,
  173.671,
  4],
 ['Fear of the Dark - 2015 Remaster',
  'Fear of the Dark (2015 Remaster)',
  'https://i.scdn.co/image/ab67616d0000b2730dda9c11454b9053d07f585a',
  'Iron Maiden',
  'https://p.scdn.co/mp3-preview/3dfbef8f1bf0394fb693dc7a2033143

In [9]:
# Now, I add lyrics scraping from genius.com
def scrape_iron_maiden_lyrics(artist,song):
    artistname = str(artist.replace(' ','-') if ' ' in artist else str(artist))
    song = str(song.replace(" - 2015 Remaster","") if " - 2015 Remaster" in song else str(song))
    songname = str(song.replace(' ','-') if ' ' in song else str(song))
    #print(songname)
    page = requests.get('https://genius.com/'+ artistname + '-' + songname + '-' + 'lyrics',headers={'User-Agent': 'Mozilla/5.0'})
    #print('https://genius.com/'+ artistname + '-' + songname + '-' + 'lyrics')
    html = BS(page.text,'html.parser')
    #print(html)
    time.sleep(3)
    lyrics = html.find_all('div', {"data-lyrics-container":"true","class":'Lyrics__Container-sc-1ynbvzw-6 YYrds'})
    #print(lyrics)
    lyrics_list = []
    for text in lyrics:
        #remove identifiers like chorus, verse, etc
        verse = text.get_text(separator=" ").strip()
        verse = re.sub(r'[\(\[].*?[\)\]]', '', verse)
        #print(verse)
        lyrics_list.append(verse)
    lyrics_text = "\n".join(lyrics_list)
    return lyrics_text

In [10]:
# create dataset
df = pd.DataFrame(tracks, columns = ["track","album","album_cover","artist","sample","release_date","length","popularity","acousticness","danceability","energy","instrumentalness","liveness","loudness","speechiness","tempo","time_signature"])
df

Unnamed: 0,track,album,album_cover,artist,sample,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,The Trooper - 2015 Remaster,Piece of Mind (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2732277d1...,Iron Maiden,https://p.scdn.co/mp3-preview/a373cf851d1635e2...,1983,252733,74,0.0318,0.285,0.908,0.00909,0.311,-4.601,0.0705,159.756,4
1,Run to the Hills - 2015 Remaster,The Number of the Beast (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2735c29a8...,Iron Maiden,https://p.scdn.co/mp3-preview/c315f66924d7e642...,1982,233506,71,0.0286,0.249,0.943,0.00401,0.162,-5.188,0.0669,173.671,4
2,Fear of the Dark - 2015 Remaster,Fear of the Dark (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2730dda9c...,Iron Maiden,https://p.scdn.co/mp3-preview/3dfbef8f1bf0394f...,1992,438120,71,0.000739,0.308,0.866,0.00141,0.232,-5.671,0.059,109.825,4
3,Wasted Years - 2015 Remaster,Somewhere in Time (2015 Remaster),https://i.scdn.co/image/ab67616d0000b27369059f...,Iron Maiden,https://p.scdn.co/mp3-preview/0d657d8c64eecd6b...,1986,309640,66,0.0263,0.314,0.959,0.000148,0.161,-4.657,0.0908,154.221,4
4,2 Minutes to Midnight - 2015 Remaster,Powerslave (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2730335e5...,Iron Maiden,https://p.scdn.co/mp3-preview/26db09ecb26c0ee9...,1984,363586,65,0.00754,0.446,0.97,0.000275,0.199,-5.004,0.0508,94.233,4
5,The Number of the Beast - 2015 Remaster,The Number of the Beast (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2735c29a8...,Iron Maiden,https://p.scdn.co/mp3-preview/e5eec14e5f92f1a6...,1982,290586,68,0.088,0.409,0.89,0.107,0.0935,-6.839,0.0558,100.409,4
6,Wasting Love - 2015 Remaster,Fear of the Dark (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2730dda9c...,Iron Maiden,https://p.scdn.co/mp3-preview/963ef4d48109cfc1...,1992,351026,64,0.00562,0.324,0.667,3.6e-05,0.0873,-5.373,0.0321,142.503,4
7,Flight of Icarus - 2015 Remaster,Piece of Mind (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2732277d1...,Iron Maiden,https://p.scdn.co/mp3-preview/dc9f0658417675c8...,1983,230760,61,0.147,0.368,0.917,0.000121,0.169,-4.829,0.0772,110.455,4
8,Aces High - 2015 Remaster,Powerslave (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2730335e5...,Iron Maiden,https://p.scdn.co/mp3-preview/96a8ef51969bc202...,1984,271906,63,0.0337,0.3,0.936,0.021,0.158,-5.399,0.0456,133.065,4
9,Hallowed Be Thy Name - 2015 Remaster,The Number of the Beast (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2735c29a8...,Iron Maiden,https://p.scdn.co/mp3-preview/cb55675dd6cbd1bb...,1982,431093,66,0.0346,0.313,0.882,0.338,0.45,-6.215,0.0355,103.981,4


In [11]:
test = scrape_iron_maiden_lyrics('Iron Maiden',"The Trooper - 2015 Remaster")
test

" You'll take my life, but I'll take yours too You'll fire your musket, but I'll run you through So when you're waiting for the next attack You'd better stand, there's no turning back The bugle sounds, the charge begins But on this battlefield, no one wins The smell of acrid smoke   and horse's breath As I plunge on into certain death  Oh oh oh oh oh oh oh oh oh Oh oh oh oh oh oh oh oh oh  The horse, he sweats with fear, we break to run The mighty roar of the Russian guns And as we race towards the human wall The screams of pain as my comrades fall We hurdle bodies that lay on the ground And the Russians fire another round We get so near, yet so far away We won't live to fight another day  Oh oh oh oh oh oh oh oh oh Oh oh oh oh oh oh oh oh oh     We get so close, near enough to fight When a Russian gets me in his sights He pulls the trigger and I feel the blow A burst of rounds takes my horse below And as I lay there, gazing at the sky My body's numb and my throat is dry And as I lay f

In [12]:
def lyrics_onto_dataframe(df,artist):
    for i,song in enumerate(df['track']):
        test = scrape_iron_maiden_lyrics(artist,song)
        #print(test)
        df.loc[i,'lyrics'] = test
    return df

df = lyrics_onto_dataframe(df,'Iron Maiden')

In [13]:
df.to_csv("iron_maiden.csv", sep = ',')
iron_maiden = pd.read_csv('iron_maiden.csv')
iron_maiden

Unnamed: 0.1,Unnamed: 0,track,album,album_cover,artist,sample,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,lyrics
0,0,The Trooper - 2015 Remaster,Piece of Mind (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2732277d1...,Iron Maiden,https://p.scdn.co/mp3-preview/a373cf851d1635e2...,1983,252733,74,0.0318,0.285,0.908,0.00909,0.311,-4.601,0.0705,159.756,4,"You'll take my life, but I'll take yours too ..."
1,1,Run to the Hills - 2015 Remaster,The Number of the Beast (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2735c29a8...,Iron Maiden,https://p.scdn.co/mp3-preview/c315f66924d7e642...,1982,233506,71,0.0286,0.249,0.943,0.00401,0.162,-5.188,0.0669,173.671,4,White man came across the sea He brought us p...
2,2,Fear of the Dark - 2015 Remaster,Fear of the Dark (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2730dda9c...,Iron Maiden,https://p.scdn.co/mp3-preview/3dfbef8f1bf0394f...,1992,438120,71,0.000739,0.308,0.866,0.00141,0.232,-5.671,0.059,109.825,4,I am a man who walks alone And when I'm walki...
3,3,Wasted Years - 2015 Remaster,Somewhere in Time (2015 Remaster),https://i.scdn.co/image/ab67616d0000b27369059f...,Iron Maiden,https://p.scdn.co/mp3-preview/0d657d8c64eecd6b...,1986,309640,66,0.0263,0.314,0.959,0.000148,0.161,-4.657,0.0908,154.221,4,From the coast of gold Across the seven seas ...
4,4,2 Minutes to Midnight - 2015 Remaster,Powerslave (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2730335e5...,Iron Maiden,https://p.scdn.co/mp3-preview/26db09ecb26c0ee9...,1984,363586,65,0.00754,0.446,0.97,0.000275,0.199,-5.004,0.0508,94.233,4,"Kill for gain, or shoot to maim But we don't ..."
5,5,The Number of the Beast - 2015 Remaster,The Number of the Beast (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2735c29a8...,Iron Maiden,https://p.scdn.co/mp3-preview/e5eec14e5f92f1a6...,1982,290586,68,0.088,0.409,0.89,0.107,0.0935,-6.839,0.0558,100.409,4,"Woe to you, o'er Earth and Sea For the Devil ..."
6,6,Wasting Love - 2015 Remaster,Fear of the Dark (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2730dda9c...,Iron Maiden,https://p.scdn.co/mp3-preview/963ef4d48109cfc1...,1992,351026,64,0.00562,0.324,0.667,3.6e-05,0.0873,-5.373,0.0321,142.503,4,Maybe one day I'll be an honest man Up 'til n...
7,7,Flight of Icarus - 2015 Remaster,Piece of Mind (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2732277d1...,Iron Maiden,https://p.scdn.co/mp3-preview/dc9f0658417675c8...,1983,230760,61,0.147,0.368,0.917,0.000121,0.169,-4.829,0.0772,110.455,4,As the sun breaks above the ground An old man...
8,8,Aces High - 2015 Remaster,Powerslave (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2730335e5...,Iron Maiden,https://p.scdn.co/mp3-preview/96a8ef51969bc202...,1984,271906,63,0.0337,0.3,0.936,0.021,0.158,-5.399,0.0456,133.065,4,There goes the siren that warns of the air r...
9,9,Hallowed Be Thy Name - 2015 Remaster,The Number of the Beast (2015 Remaster),https://i.scdn.co/image/ab67616d0000b2735c29a8...,Iron Maiden,https://p.scdn.co/mp3-preview/cb55675dd6cbd1bb...,1982,431093,66,0.0346,0.313,0.882,0.338,0.45,-6.215,0.0355,103.981,4,I'm waiting in my cold cell when the bell be...
