# Lyrics Using Genius API

In [1]:
#import libraries
import pandas as pd
import lyricsgenius as genius #used to interface with Genius API
import string
import re

from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer

In [2]:
#token provided by Genius API
%store -r client_access_token

In [3]:
#initiate Genius
genius = genius.Genius(client_access_token)
genius.verbose = False #turn off status messages

### Functions

In [86]:
def get_lyrics(track,artist):
    '''
    function returns song's lyrics
    parameters:
        track-->str
        artist-->str
    '''
    track = re.sub(' - .+', '', track) #remove text after '-'
    
    try:
        return genius.search_song(track,artist).lyrics
    except:
        print(track + ' by ' + artist + ' is not available')
    

In [5]:
def get_df_songs(track_list,artist_list):
    '''
    function obtains lyrics and returns dataframe with columns for track, artist, lyrics
    parameters:
        track_list-->list of str 
        artist_list-->list of str
    '''
    lyrics_list = [get_lyrics(track_list[x],artist_list[x]) for x in range(len(track_list))] #get lyrics for each song
    
    return pd.DataFrame(data={'track':track_list,'artist':artist_list,'lyrics':lyrics_list})

In [6]:
def clean_lyrics(df,col,new_col):
    '''
    function returns dataframe with new column of cleaned text (song lyrics)
    parameters:
        df-->pandas dataframe
        col-->column to clean (str)
        new_col-->name of column with cleaned text (str)
    '''
    df[new_col] = df[col].str.lower() #make all text lowercase
    df[new_col] = df[new_col].str.replace(r'\n',' ') #replace '\n' character with space
    df[new_col] = df[new_col].str.replace(r'\[[^\[\]]*]','') #remove brackets and inside text
    df[new_col] = df[new_col].str.replace(r"\'\w*",'').str.replace(r'[^\w\d\s]+','') #remove extra characters
    df[new_col] = df[new_col].str.strip() #remove extra whitespace

    return df

In [7]:
def normalize_lyrics(df,col):
    '''
    function returns dataframe with column as list of words
        tokenizes, removes stopwords from, and lemmatizes lyrics
    parameters:
        df-->pandas dataframe
        col-->column to normalize
    '''
    df[col] = df[col].str.split() #tokenize lyrics
    
    stop_words = set(stopwords.words('english'))
    df[col] = df[col].apply(lambda row:[w for w in row if w not in stop_words]) #remove stopwords
    
    def lemmatize_text(text):
        '''
        function returns lemmatized text
        parameters:
            text-->str
        '''
        lemmatizer = WordNetLemmatizer()
        
        return [lemmatizer.lemmatize(w) for w in text]
    
    df[col] = df[col].apply(lemmatize_text) #lemmatize words
    
    return df

### Import Songs to Analyze

Read in the resulting dataframes from the spotify_analysis notebook, which were created as follows:

 - Started with the top five tracks for each of country, R&B/hip-hop, and rock/alternative as of the week of May 15, 2021, based on Billboard Top 100 charts (referred to as the "seed tracks")
 - Used Spotify's recommender algorithm to find the most similar songs to the seed tracks (returns a maximum of 100 songs per search)
 - Ranked the most similar songs by audio features using Euclidean distance
 - Fed the top ranking songs through Spotify's recommender algorithm until there were at least 1,000 songs per genre

In [8]:
#country
df_cty = pd.read_csv('data/df_cty.csv')
df_cty.drop(columns='Unnamed: 0',inplace=True)

In [9]:
df_cty

Unnamed: 0,track,artist,track_id,release_date,dance,energy,loud,speech,acoust,live,valence,tempo,sim_score
0,Forever After All,Luke Combs,6IBcOGPsniK3Pso1wHIhew,2020-10-23,0.487,0.650,-5.195,0.0253,0.1910,0.0933,0.456,151.964,
1,The Good Ones,Gabby Barrett,3hLuHKzG1cmlRpq53ZVWd8,2020-06-19,0.519,0.552,-5.023,0.0259,0.1800,0.1490,0.331,89.957,
2,Made For You,Jake Owen,7vF3xkCMvZjAe2nTWY0uQZ,2019-03-29,0.581,0.441,-6.829,0.0268,0.7700,0.1110,0.337,82.125,
3,Hell Of A View,Eric Church,1kBx9VGumfuvlfqdlAGorE,2020-10-02,0.689,0.582,-6.778,0.0236,0.1190,0.1010,0.883,99.021,
4,Breaking Up Was Easy In The 90's,Sam Hunt,4sf2L157iEgAR7yrCNLgSq,2020-04-03,0.562,0.649,-5.400,0.0494,0.2310,0.3410,0.376,145.913,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1021,Hometown,Logan Mize,10MpuvwpKpAJPsbttLjgW1,2021-01-27,0.586,0.801,-5.078,0.0532,0.2480,0.0994,0.718,74.973,-37.832074
1022,Drinkin' Hours,Cole Swindell,2wGFhmzwELvaeXzKQeB7Yb,2019-10-18,0.522,0.922,-3.184,0.0770,0.0552,0.2760,0.878,156.045,-41.336953
1023,Grass Is Always Greener,Jake Owen,1S4zg0dP5HdZkOtqrDk3yg,2019-03-29,0.480,0.851,-3.980,0.0758,0.3760,0.1090,0.881,157.999,-43.245243
1024,Got What I Got,Jason Aldean,4TCc369aRPRubv1m8R1TBG,2019-11-22,0.512,0.580,-6.658,0.0290,0.6210,0.5130,0.303,159.847,-45.061028


In [24]:
df_cty['track'].isnull().sum()

0

In [10]:
#R&B/hip-hop
df_rb = pd.read_csv('data/df_rb.csv')
df_rb.drop(columns='Unnamed: 0',inplace=True)

In [12]:
#rock/alternative
df_rock = pd.read_csv('data/df_rock.csv')
df_rock.drop(columns='Unnamed: 0',inplace=True)

### Obtain Lyrics from Genius API

Pull lyrics from the Genius API with the lyricsgenius wrapper and put into dataframes.

In [10]:
#country
df_cty_lyrics = get_df_songs(list(df_cty['track']),list(df_cty['artist']))

All on Me - PHZES Remix by Sammy Adams is not available
Looking At You (feat. Sam Vesso) - GSPR Remix by Two Friends is not available
Caught Up In The Country (Sam Feldt Remix) by Rodney Atkins is not available
Fault (Kdrew Remix) by mike. is not available
Hell - Salasnich Remix by Two Friends is not available
Rumor (Bryan Todd Remix) by Lee Brice is not available
One Beer (HARDY feat. Lauren Alaina, Devin Dawson) by HIXTAPE is not available
Down For It (feat. T.I.) - JD Walker Version by Willie Jones is not available
Giving You Up - Acoustic by Kameron Marlowe is not available
Something You Ain't Ever Had by Waterloo Revival is not available
Ballin' by Rvshvd is not available
Saltwater Gospel - Fins Up Version by Eli Young Band is not available
Every Little Thing - Ruffian Remix by Russell Dickerson is not available
I Should Probably Go To Bed by Dan + Shay is not available
Shallow (The Duet with Garth Brooks and Trisha Yearwood) by Trisha Yearwood is not available
Forever Begins Toni

In [24]:
p = [('All on Me - PHZES Remix', 'Sammy Adams'),
     ('Looking At You (feat. Sam Vesso) - GSPR Remix', 'Two Friends'),
     ('Caught Up In The Country (Sam Feldt Remix)', 'Sammy Adams'),
     ('Fault (Kdrew Remix)', 'mike.'),
     ('Hell - Salasnich Remix', 'Two Friends'),
     ('Rumor (Bryan Todd Remix)', 'Lee Brice'),
     ('One Beer (HARDY feat. Lauren Alaina, Devin Dawson)', 'HIXTAPE'),
     ('Down For It (feat. T.I.) - JD Walker Version', 'Willie Jones'),
     ('Giving You Up - Acoustic', 'Kameron Marlowe'),
     ("Something You Ain't Ever Had", 'Waterloo Revival')
    ]

In [85]:
re.sub(' - .+', '', 'All on Me')


#'Hell - Salasnich Remix'.replace(r' - ','') #remove text after '-'

'All on Me'

In [87]:
p_list = [get_lyrics(t,a) for t,a in p]

Fault (Kdrew Remix) by mike. is not available
Rumor (Bryan Todd Remix) by Lee Brice is not available
One Beer (HARDY feat. Lauren Alaina, Devin Dawson) by HIXTAPE is not available


In [88]:
get_lyrics('One Beer','HIXTAPE')

One Beer by HIXTAPE is not available


In [14]:
df_cty_lyrics

Unnamed: 0,track,artist,lyrics
0,Forever After All,Luke Combs,[Verse 1]\nA cold beer's got twelve ounces\nA ...
1,The Good Ones,Gabby Barrett,[Verse 1]\nHe's a phone call to his parents\nH...
2,Made For You,Jake Owen,[Verse 1]\nWater towers are made for hearts an...
3,Hell Of A View,Eric Church,[Verse 1]\nI was no daddy's dream\nWas not you...
4,Breaking Up Was Easy In The 90's,Sam Hunt,"[Intro]\nYeah, man, oh man, oh man\n\n[Verse 1..."
...,...,...,...
1021,Hometown,Logan Mize,[Verse 1]\nHow'd I end up here where I don't b...
1022,Drinkin' Hours,Cole Swindell,[Intro]\nHey\nWoo!\n\n[Verse 1]\nBeen watchin'...
1023,Grass Is Always Greener,Jake Owen,[Verse 1: Jake Owen]\nNeighbor got a brand new...
1024,Got What I Got,Jason Aldean,"[Verse 1]\nShe said, ""Baby, do you ever miss b..."


In [None]:
#R&B/hip-hop
df_rb_lyrics = get_df_songs(df_rb['track'],df_rb['artist'])

In [None]:
#rock
df_rock_lyrics = get_df_songs(df_rock['track'],df_rock['artist'])

### Clean and Preprocess Lyrics

In [None]:
#df_clean = clean_lyrics(df,'lyrics','words')

In [None]:
#df_clean = normalize_lyrics(df_clean,'words')

In [None]:
#df_clean

### Join Dataframes