In [20]:
from dotenv import load_dotenv
# import requests
import lyricsgenius as lg
import pandas as pd
import numpy as np
import re
import string
import os

In [21]:
client_id = os.getenv('GENIUS_CLIENT_ID')
client_secret = os.getenv('GENIUS_CLIENT_SECRET')
access_token = os.getenv('GENIUS_ACCESS_TOKEN')

In [36]:
spot_df = pd.read_csv('track_artists.csv')
names = np.array(spot_df['name'])
artists = np.array(spot_df['artists_names'])

In [23]:
genius = lg.Genius(access_token=access_token)

In [24]:
def get_spot_lyrics(row):
        song_name = row['name']
        artist_name = row['artists_names']
        # for names e.g. XXX Song - Live at XXX
        song_name = song_name.split(' - ',1)[0]
        # for names e.g. XXX (feat YYY)
        song_name = re.sub(r"[\(].*?[\)]", "", song_name)
        artist_name = re.sub(r"[\(].*?[\)]", "", artist_name)
        # for removing all punctuation
        song_name = song_name.translate(str.maketrans('', '', string.punctuation))
        artist_name = artist_name.translate(str.maketrans('', '', string.punctuation))
    
        try:
            song = genius.search_song(song_name, artist= artist_name, get_full_info= False)
        except:
            return ''
            
        if song:
            return song.lyrics.split(']',1)[-1]
        return ''

In [27]:
import dask.dataframe as dd

spot_dask = dd.from_pandas(spot_df, npartitions= 128)
# res = spot_dask.map_partitions(get_spot_lyrics,meta=(None, 'string'))
res = spot_dask.apply(get_spot_lyrics, axis =1, meta = (None, 'string'))


In [28]:
res_out = res.compute()

Searching for "Walk on by " by Noosa...
Searching for "The Safety Dance" by Men Without Hats...
Searching for "Try Me" by DeJ Loaf...
Searching for "Way Up " by Floduxe...
Searching for "Coastin feat K Flay" by Zion I...
Searching for "Human Being" by Zion I...
Searching for "Whirlpool" by Sea Wolf...
Searching for "Fantasy" by Alina Baraz...
Searching for "Saskatoon" by Data Romance...
Searching for "Party  Bullshit In The USA" by DJ Crazy J Rodriguez...
Searching for "Religion" by Tongues...
Searching for "Call Me Crazy" by PJ Simas...
Searching for "Pretty Wings" by Zak Waters...
Searching for "Idle Hands" by EXGF...
Searching for "Wait a Minute" by Safakash...
Searching for "Whiskey My Love" by Kalob Griffin Band...
No results found for: 'Saskatoon Data Romance'
Searching for "Pocket Full Of Gold" by American Authors...
No results found for: 'Wait a Minute Safakash'
Searching for "Waiting For You" by Grizfolk...
Done.
Searching for "Endless Summer" by Grizfolk...
Done.
Searching fo

In [40]:
spot_df['lyrics'] = res_out
spot_df.head()


Unnamed: 0,track_uri,name,artists_names,popularity,album_type,playlist_uris,danceability,energy,key,loudness,...,liveness,valence,tempo,duration_ms,time_signature,artists_genres,release_year,artists_mean_popularities,artists_mean_followers,lyrics
0,spotify:track:3v6sBj3swihU8pXQQHhDZo,Way Up,Floduxe,0,single,['spotify:playlist:5JJZvA3VR9RZ5XIr0reWiM'],0.611,0.614,5.0,-8.815,...,0.753,0.52,128.05,195000.0,4.0,['indie'],2015,9.0,765.0,I just poured up a 4 you know what I'm sayin'...
1,spotify:track:7KCWmFdw0TzoJbKtqRRzJO,Fantasy,Alina Baraz,33,album,['spotify:playlist:5JJZvA3VR9RZ5XIr0reWiM'],0.638,0.781,4.0,-6.848,...,0.349,0.25,122.985,194641.0,4.0,"['future', 'etherpop', 'bass', 'chillstep', 'r...",2015,62.0,711030.5,So you say you wanna get away We don't need a...
2,spotify:track:2CY92qejUrhyPUASawNVRr,Try Me,DeJ Loaf,52,single,['spotify:playlist:5JJZvA3VR9RZ5XIr0reWiM'],0.56,0.81,0.0,-8.029,...,0.241,0.247,170.044,217573.0,4.0,"['queen', 'trap', 'detroit', 'hip', 'r&b', 'po...",2014,65.0,1544981.0,"Let a nigga try me, try me I'm a get his whol..."
3,spotify:track:11BPfwVbB7vok7KfjBeW4k,Walk on by,Noosa,0,single,['spotify:playlist:5JJZvA3VR9RZ5XIr0reWiM'],0.525,0.699,9.0,-4.571,...,0.0888,0.199,92.011,443478.0,4.0,"['pop', 'shimmer', 'shiver']",2014,32.0,18241.0,2 ContributorsWalk On By LyricsRun when the rh...
4,spotify:track:3yUJKPsjvThlcQWTS9ttYx,Pretty Wings,Zak Waters,0,single,['spotify:playlist:5JJZvA3VR9RZ5XIr0reWiM'],0.367,0.771,9.0,-5.863,...,0.0965,0.163,115.917,225862.0,4.0,"['poptimism', 'la', 'indie', 'pop']",2014,34.5,11940.0,""" ft. Björk Tessa Violet - ""You Are Not My Fri..."


In [47]:
def process_lyrics(row):
    lyrics = row['lyrics']
    if len(lyrics) == 0:
        lyrics = row['name'] + row['artists_names']
    # remove words in brackets and square brackets
    lyrics = re.sub("[\(\[].*?[\)\]]", "", lyrics)
    # remove line breaks
    lyrics= lyrics.replace("\n", " ")
    # remove all punctuation
    lyrics = lyrics.translate(str.maketrans('', '', string.punctuation))
    # lower case
    return lyrics.lower()


In [59]:
import fasttext
model = fasttext.load_model('lid.176.bin')
spot_df['lyrics'] = spot_df.apply(lambda x : process_lyrics(x), axis = 1)
is_eng = spot_df['lyrics'].apply(lambda x : model.predict(x)[0][0] == '__label__en')
spot_df = spot_df[is_eng].reset_index(drop = True)
spot_df.shape

(41, 24)

In [None]:
spot_df.to_json('spot_lyrics.json')