In [400]:
import spotipy
import pandas as pd
import spotipy.util as util
import lyricsgenius
import numpy as np
client_id = '***'
client_secret = '***'
redirect_uri = '***'
scope = 'user-top-read user-library-read'
from nltk.sentiment.vader import SentimentIntensityAnalyzer
genius = lyricsgenius.Genius("***")
sid = SentimentIntensityAnalyzer()

In [434]:
## Functions ##


#scrapes all song features for a list of song IDs and returns dataframe
def getSongFeatures(song_list):
    df = pd.DataFrame({
                    'song_uri' : [],
                    'danceability' : [],
                    'energy' : [],
                    'key' : [],
                    'loudness' : [],
                    'mode' : [],
                    'speechiness' : [],
                    'acousticness' : [],
                    'instrumentalness' : [],
                    'valence' : [],
                    'tempo' : []
    })
    
    for index in range(0,len(song_list)):
        features_list = sp.audio_features(song_list[index])[0]
        track_info = sp.track(song_list[index])
        try:
            temp_df = pd.DataFrame({ 
                     'song_name' : track_info['name'],
                     'album_name' : track_info['album']['name'],
                     'song_uri' : (song_list[index]),
                     'danceability' : (features_list['danceability']),
                     'energy' : (features_list['energy']),
                     'key' : (features_list['key']),
                    'loudness' : (features_list['loudness']),
                    'mode' : (features_list['mode']),
                    'speechiness' : (features_list['speechiness']),
                    'acousticness' : (features_list['acousticness']),
                    'instrumentalness' : (features_list['instrumentalness']),
                    'valence' : (features_list['valence']),
                    'tempo' : (features_list['tempo']) },
                    index = [0]
            )   
            df = pd.concat([df, temp_df])
        except:
            continue
    return df

#gets artist uri given text through spotipy
def get_artist_uri(artist):
    search_queue = sp.search(q = artist, type = "artist", limit = 10)
    return(search_queue['artists']['items'][0]['id'])

#gets list of all artist album IDs through spotipy
def get_artist_album_list(artist_uri):
    album_list = []
    sp_album_list = sp.artist_albums(artist_id = artist_uri)
    for album in sp_album_list['items']:
        album_list.append(album['id'])
    return album_list

#gets list of all artist song IDs through spotipy
def get_artist_song_list(album_list):
    song_list = []
    for album_id in album_list:
        album_track_list = sp.album_tracks(album_id)['items']
        for song in album_track_list:
            song_list.append(song['id'])
    return song_list

#gets list of song lyrics for artist given list of song NAMES using genius package
def get_song_lyric_list(artist_name, song_list):
    lyric_list = []
    for song in song_list:
        temp = genius.search_song(song, artist_name)
        try:
            lyric_list.append(temp.lyrics)
        except:
            lyric_list.append('None Found')
            continue
    return lyric_list


#returns proportion of lines that were classified as negative
def polarity_ratio(lyrics):
    lyrics_list = lyrics.splitlines()
    count = 0
    junk_lines = 0
    for line in lyrics_list:
        if any(ext in line for ext in ['Chorus', 'Verse']):
            junk_lines = junk_lines + 1
            continue
        else:
            if (sid.polarity_scores(line)['compound'] < 0):
                count = count + 1
    return(count / (len(lyrics_list) - junk_lines))

def polarity_ratio_list(lyric_list):
    polarity_list = []
    for song_lyric in lyric_list:
        polarity_list.append(polarity_ratio(song_lyric))
    return polarity_list

In [428]:
token1 = util.prompt_for_user_token('timlee717', scope,
                                  client_id = client_id,
                                  client_secret  = client_secret,
                                  redirect_uri = redirect_uri)
sp = spotipy.Spotify(auth = token1)

In [458]:
joji_uri = get_artist_uri("Joji")
joji_album_list = get_artist_album_list(joji_uri)
joji_song_list = get_artist_song_list(joji_album_list)
joji_df = getSongFeatures(joji_song_list)                                   

In [459]:
#keeping only artist original songs
joji_df = joji_df.loc[joji_df['album_name'].str.contains('BAL') | joji_df['album_name'].str.contains("In Tongues (Deluxe)")]
joji_df = joji_df.loc[~(joji_df['song_name'].str.contains("Remix") | joji_df['song_name'].str.contains('remix'))]


  


In [460]:
joji_lyrics = get_song_lyric_list("Joji", joji_df['song_name'])

Searching for "ATTENTION" by Joji...
Done.
Searching for "SLOW DANCING IN THE DARK" by Joji...
Done.
Searching for "TEST DRIVE" by Joji...
Done.
Searching for "WANTED U" by Joji...
Done.
Searching for "CAN'T GET OVER YOU (feat. Clams Casino)" by Joji...
Done.
Searching for "YEAH RIGHT" by Joji...
Done.
Searching for "WHY AM I STILL IN LA (feat. Shlohmo & D33J)" by Joji...
Done.
Searching for "NO FUN" by Joji...
Done.
Searching for "COME THRU" by Joji...
Done.
Searching for "R.I.P. (feat. Trippie Redd)" by Joji...
Done.
Searching for "XNXX" by Joji...
Done.
Searching for "I'LL SEE YOU IN 40" by Joji...
Done.
Searching for "ATTENTION" by Joji...
Done.
Searching for "SLOW DANCING IN THE DARK" by Joji...
Done.
Searching for "TEST DRIVE" by Joji...
Done.
Searching for "WANTED U" by Joji...
Done.
Searching for "CAN'T GET OVER YOU (feat. Clams Casino)" by Joji...
Done.
Searching for "YEAH RIGHT" by Joji...
Done.
Searching for "WHY AM I STILL IN LA (feat. Shlohmo & D33J)" by Joji...
Done.
Sear

In [461]:
joji_polarity = polarity_ratio_list(joji_lyrics)
joji_df['lyric'] = joji_lyrics
joji_df['Prop_of_neg_lines'] = joji_polarity
joji_df['sad_score'] = (1 - joji_df['valence'] + joji_df['Prop_of_neg_lines']) / 2

joji_df_group = joji_df.groupby(by = ['album_name','song_name'])['sad_score'].min().reset_index()
joji_columns = joji_df[['album_name','song_name','song_uri']]
joji_df_final = pd.merge(joji_df_group, joji_columns, on = ['album_name','song_name'], how = "left")
joji_df_final = joji_df_final.drop_duplicates(subset = ['album_name','song_name','sad_score'], keep = "first")

In [146]:
jeremy_uri = get_artist_uri("Jeremy Zucker")
jeremy_album_list = get_artist_album_list(jeremy_uri)
jeremy_song_list = get_artist_song_list(jeremy_album_list)
jeremy_df = getSongFeatures(jeremy_song_list)

In [163]:
grouped_df = jeremy_df.groupby(by = ['album_name','song_name'])['valence'].min().reset_index()
original_df = jeremy_df[['album_name', 'valence','song_uri','song_name']]
jeremy_df_final = pd.merge(grouped_df,original_df,on = ['album_name','song_name','valence'], how = 'left')
jeremy_df_final = jeremy_df_final.drop_duplicates(subset = ['album_name','song_name'], keep = 'first')
jeremy_df_final = jeremy_df_final.loc[~jeremy_df_final['song_name'].str.contains('remix')]

In [281]:
jeremy_lyrics = get_song_lyric_list("Jeremy Zucker", jeremy_df_final['song_name'])
jeremy_lyrics[10] = genius.search_song("Spin With You", "Jeremy Zucker").lyrics
jeremy_lyrics[19] = genius.search_song("come thru", "Jeremy Zucker").lyrics


Searching for "Heavy" by Jeremy Zucker...
Done.
Searching for "Idk Love" by Jeremy Zucker...
Done.
Searching for "Keep My Head Afloat" by Jeremy Zucker...
Done.
Searching for "Man Down" by Jeremy Zucker...
Done.
Searching for "Shut Your Mouth" by Jeremy Zucker...
Done.
Searching for "Sinking" by Jeremy Zucker...
Done.
Searching for "Stay Quiet" by Jeremy Zucker...
Done.
Searching for "Upside Down" by Jeremy Zucker...
Done.
Searching for "Paradise (feat. Cisco the Nomad)" by Jeremy Zucker...
Done.
Searching for "Peace Signs" by Jeremy Zucker...
Done.
Searching for "Spin With You (feat. Jeremy Zucker)" by Jeremy Zucker...
Done.
Searching for "Upside Down (feat. Daniel James)" by Jeremy Zucker...
Done.
Searching for "Weakness" by Jeremy Zucker...
Done.
Searching for "When You Wake up..." by Jeremy Zucker...
Done.
Searching for "hello old friend" by Jeremy Zucker...
Done.
Searching for "please" by Jeremy Zucker...
Done.
Searching for "scared" by Jeremy Zucker...
Done.
Searching for "someti

In [284]:
jeremy_polarity = polarity_ratio_list(jeremy_lyrics)
jeremy_df_final['lyric'] = jeremy_lyrics
jeremy_df_final['Prop_of_neg_lines'] = jeremy_polarity
jeremy_df_final['sad_score'] = (1-jeremy_df_final['valence'] + jeremy_df_final['Prop_of_neg_lines']) / 2

In [None]:
jeremy_df_final

In [296]:
billie_uri = get_artist_uri("Billie Eilish")
billie_album_list = get_artist_album_list(billie_uri)
billie_song_list = get_artist_song_list(billie_album_list)
billie_df = getSongFeatures(billie_song_list)

In [306]:
billie_df = billie_df.loc[~billie_df['album_name'].str.contains('NOW')]
billie_df = billie_df.loc[~billie_df['album_name'].str.contains('Fama')]
billie_df = billie_df.loc[~billie_df['album_name'].str.contains('Top')]
billie_df = billie_df.loc[~billie_df['album_name'].str.contains('Remix')]
billie_df = billie_df.loc[~billie_df['album_name'].str.contains('Edition')]

In [311]:
billie_df_grouped = billie_df.groupby(by = ['album_name', 'song_name'])['valence'].min().reset_index()
billie_df_columns = billie_df[['album_name','song_uri','song_name','valence']]
billie_df_final = pd.merge(billie_df_grouped, billie_df_columns, on = ['album_name','song_name','valence'], how = 'left')

In [313]:
billie_df_final = billie_df_final.drop_duplicates(subset=['album_name','song_name','valence'], keep = 'first')

In [356]:
billie_lyrics = get_song_lyric_list("Billie Eilish", billie_df_final['song_name'])
billie_lyrics[0] = genius.search_song('&burn','Billie Eilish').lyrics
billie_lyrics[5] = genius.search_song('WHEN I WAS OLDER','Billie Eilish').lyrics
billie_lyrics[22] = genius.search_song('&burn','Billie Eilish').lyrics
billie_lyrics[31] = genius.search_song('lovely', 'Billie Eilish').lyrics

Searching for "&burn (with Vince Staples)" by Billie Eilish...
Done.
Searching for "bellyache" by Billie Eilish...
Done.
Searching for "Bored" by Billie Eilish...
Done.
Searching for "ocean eyes" by Billie Eilish...
Done.
Searching for "Six Feet Under" by Billie Eilish...
Done.
Searching for "WHEN I WAS OLDER - Music Inspired By The Film ROMA" by Billie Eilish...
Done.
Searching for "!!!!!!!" by Billie Eilish...
Done.
Searching for "8" by Billie Eilish...
Done.
Searching for "all the good girls go to hell" by Billie Eilish...
Done.
Searching for "bad guy" by Billie Eilish...
Done.
Searching for "bury a friend" by Billie Eilish...
Done.
Searching for "goodbye" by Billie Eilish...
Done.
Searching for "i love you" by Billie Eilish...
Done.
Searching for "ilomilo" by Billie Eilish...
Done.
Searching for "listen before i go" by Billie Eilish...
Done.
Searching for "my strange addiction" by Billie Eilish...
Done.
Searching for "when the party's over" by Billie Eilish...
Done.
Searching for "

In [359]:
billie_polarity = polarity_ratio_list(billie_lyrics)
billie_df_final['lyric'] = billie_lyrics
billie_df_final['Prop_of_neg_lines'] = billie_polarity
billie_df_final['sad_score'] = (1-billie_df_final['valence'] + billie_df_final['Prop_of_neg_lines'])/2

In [361]:
billie_df_final['sad_score'].mean()

0.4731979090674508

In [362]:
joji_df['sad_score'].mean()

0.4260942032818021

In [363]:
jeremy_df_final['sad_score'].mean()

0.35389510632185484

In [367]:
jj_uri = get_artist_uri("Jack Johnson")
jj_album_list = get_artist_album_list(jj_uri)
jj_song_list = get_artist_song_list(jj_album_list)
jj_df = getSongFeatures(jj_song_list)

In [371]:
jj_df = jj_df.loc[jj_df['album_name'] == 'In Between Dreams']
jj_lyrics = get_song_lyric_list("Jack Johnson", jj_df['song_name'])

Searching for "Better Together" by Jack Johnson...
Done.
Searching for "Never Know" by Jack Johnson...
Done.
Searching for "Banana Pancakes" by Jack Johnson...
Done.
Searching for "Good People" by Jack Johnson...
Done.
Searching for "No Other Way" by Jack Johnson...
Done.
Searching for "Sitting, Waiting, Wishing" by Jack Johnson...
Done.
Searching for "Staple It Together" by Jack Johnson...
Done.
Searching for "Situations" by Jack Johnson...
Done.
Searching for "Crying Shame" by Jack Johnson...
Done.
Searching for "If I Could" by Jack Johnson...
Done.
Searching for "Breakdown" by Jack Johnson...
Done.
Searching for "Belle" by Jack Johnson...
Done.
Searching for "Do You Remember" by Jack Johnson...
Done.
Searching for "Constellations" by Jack Johnson...
Done.
Searching for "Mudfootball - Live" by Jack Johnson...
Done.
Searching for "Better Together" by Jack Johnson...
Done.
Searching for "Never Know" by Jack Johnson...
Done.
Searching for "Banana Pancakes" by Jack Johnson...
Done.
Searc

In [377]:
jj_df_grouped = jj_df.groupby(by = ['album_name', 'song_name'])['valence'].min().reset_index()
jj_columns = jj_df[['album_name','valence','song_name','song_uri']]
jj_df_final = pd.merge(jj_df_grouped, jj_columns, on = ['album_name','song_name','valence'], how = 'left')
jj_df_final = jj_df_final.drop_duplicates(subset = ['album_name','song_name'], keep = "first")

In [380]:
jj_lyrics = get_song_lyric_list("Jack Johnson", jj_df_final['song_name'])
jj_polarity = polarity_ratio_list(jj_lyrics)

Searching for "Banana Pancakes" by Jack Johnson...
Done.
Searching for "Belle" by Jack Johnson...
Done.
Searching for "Better Together" by Jack Johnson...
Done.
Searching for "Breakdown" by Jack Johnson...
Done.
Searching for "Constellations" by Jack Johnson...
Done.
Searching for "Constellations - Demo Version" by Jack Johnson...
Done.
Searching for "Crying Shame" by Jack Johnson...
Done.
Searching for "Do You Remember" by Jack Johnson...
Done.
Searching for "Good People" by Jack Johnson...
Done.
Searching for "If I Could" by Jack Johnson...
Done.
Searching for "Mudfootball - Live" by Jack Johnson...
Done.
Searching for "Never Know" by Jack Johnson...
Done.
Searching for "No Other Way" by Jack Johnson...
Done.
Searching for "Sitting, Waiting, Wishing" by Jack Johnson...
Done.
Searching for "Situations" by Jack Johnson...
Done.
Searching for "Staple It Together" by Jack Johnson...
Done.


In [384]:
jj_df_final['lyric'] = jj_lyrics
jj_df_final['Prop_of_neg_lines'] = jj_polarity
jj_df_final['sad_score'] = (1-jj_df_final['valence'] + jj_df_final['Prop_of_neg_lines'])/2

In [404]:
kkb_uri = get_artist_uri("Kero Kero Bonito")
kkb_album_list = get_artist_album_list(kkb_uri)
kkb_song_list = get_artist_song_list(kkb_album_list)
kkb_df = getSongFeatures(kkb_song_list)

In [412]:
kkb_df = kkb_df.loc[~kkb_df['song_name'].str.contains('emix')]
kkb_df_grouped = kkb_df.groupby(by = ['song_name', 'album_name'])['valence'].min().reset_index()
kkb_df_columns = kkb_df[['song_name','album_name','valence','song_uri']]
kkb_df_final = pd.merge(kkb_df_grouped, kkb_df_columns, on = ['song_name','album_name','valence'], how = "left")
kkb_df_final = kkb_df_final.drop_duplicates(subset = ['song_name'], keep = "first")

In [414]:
kkb_lyrics = get_song_lyric_list("Kero Kero Bonito", kkb_df_final['song_name'])

Searching for "Babies (Are so Strange)" by Kero Kero Bonito...
Done.
Searching for "Big City" by Kero Kero Bonito...
Done.
Searching for "Bonito Intro" by Kero Kero Bonito...
Done.
Searching for "Bonito Jingle" by Kero Kero Bonito...
Done.
Searching for "Break" by Kero Kero Bonito...
Done.
Searching for "Cat Vs Dog - English Version" by Kero Kero Bonito...
No results found for: 'Cat Vs Dog - English Version Kero Kero Bonito'
Searching for "Cat Vs Dog - Japanese Version" by Kero Kero Bonito...
Done.
Searching for "Cinema" by Kero Kero Bonito...
Done.
Searching for "Dear Future Self" by Kero Kero Bonito...
Done.
Searching for "Dump" by Kero Kero Bonito...
Done.
Searching for "Fish Bowl" by Kero Kero Bonito...
Done.
Searching for "Flyway" by Kero Kero Bonito...
Done.
Searching for "Forever Summer Holiday" by Kero Kero Bonito...
Done.
Searching for "Graduation" by Kero Kero Bonito...
Done.
Searching for "Heard a Song" by Kero Kero Bonito...
Done.
Searching for "Hey Parents" by Kero Kero Bo

In [420]:
kkb_df_final['lyric'] = kkb_lyrics
kkb_df_final['Prop_of_neg_lines'] = kkb_polarity
kkb_df_final['sad_score'] = (1-kkb_df_final['valence'] + kkb_df_final['Prop_of_neg_lines'])/2

In [435]:
excision_uri = get_artist_uri("Excision")
excision_albums = get_artist_album_list(excision_uri)
excision_song_list = get_artist_song_list(excision_albums)
excision_df = getSongFeatures(excision_song_list)

In [438]:
excision_df['test'] = 1- excision_df['valence']

In [None]:
joji_df_final = pd.merge(joji_df_final, joji_df[['song_uri', 'valence', 'Prop_of_neg_lines','lyric']], 
                         on = 'song_uri', how = 'left' )

In [467]:
joji_df_final['artist'] = "JOJI"
billie_df_final['artist'] = "Billie Eilish"
kkb_df_final['artist'] = "Kero Kero Bonito"
jeremy_df_final['artist'] = "Jeremy Zucker"
jj_df_final['artist'] = "Jack Johnson"

In [472]:
final_df = pd.concat([joji_df_final, kkb_df_final, billie_df_final, jeremy_df_final,jj_df_final])
final_df['Prop_of_neg_lines'] = final_df['Prop_of_neg_lines'].astype('float64')
final_df['album_name'] = final_df['album_name'].astype('str')
final_df['lyric'] = final_df['lyric'].astype('str')
final_df['sad_score'] = final_df['sad_score'].astype('float64')
final_df['song_name'] = final_df['song_name'].astype('str')
final_df['song_uri'] = final_df['song_uri'].astype('str')
final_df['valence'] = final_df['valence'].astype('float64')

In [473]:
final_df.to_csv("MusicData.csv", index = False)