In [None]:
import requests
from bs4 import BeautifulSoup
import os
import re
import lyricsgenius
import pandas as pd
from tqdm import tqdm

In [None]:
from secrets import *
genius = lyricsgenius.Genius(GENIUS_ACCESS_TOKEN)

genius.verbose = False # Turn off status messages
genius.remove_section_headers = True # Remove section headers (e.g. [Chorus]) from lyrics when searching
genius.skip_non_songs = False # Include hits thought to be non-songs (e.g. track lists)
genius.excluded_terms = ["(Remix)", "(Live)", "Remix"] # Exclude songs with these words in their title

### Preprocess

In [None]:
import os
cwd = os.getcwd()
file_name = "data/MacMiller_AD.csv"
file_path = os.path.join(cwd, file_name)

In [None]:
if not os.path.isfile(file_path):
    artists = ['Mac Miller']
    artist_df = pd.DataFrame()
    albums = ['Blue Slide Park', 'Watching Movies with the Sound Off', 'Faces',
             'GO:OD AM', 'The Divine Feminine', 'Swimming', 'Circles',
             'K.I.D.S.', 'Best Day Ever', 'I Love Life, Thank You', 'Macadelic', 'Delusional Thomas',
             'You'] #'Live from Space',

    for album_name in tqdm(albums):

        if album_name not in ['You', 'Delusional Thomas']:
            album = genius.search_album(album_name, artists[0])
        elif album_name == 'You':
            album = genius.search_album(album_name, 'Larry Lovestein & The Velvet Revival')
        elif album_name == 'Delusional Thomas':
            album = genius.search_album(album_name, 'Delusional Thomas')
        assert len(album.tracks) != 0, 'Empty album'

        d = []
        i=0
        for i in range (len(album.tracks)):
            if "remix" in album.tracks[i].song.title.lower():
                break
            else:
                d.append(
                    {
                        'artist': (album.tracks[i].song.artist),
                        'album': (album.name),
                        'release_date' : (album.release_date_components.strftime('%Y-%m-%d')),
                        'track_no' : (album.tracks[i].number),
                        'title': (album.tracks[i].song.title),
                        'lyrics': (album.tracks[i].song.lyrics),
                        'art': (album.cover_art_url),
                        'url': (album.tracks[0].song.url)
                    }
                )

        album_df = pd.DataFrame(d)
        album_df['lyrics'] = album_df['lyrics'].replace(to_replace ='\[.*?\]', value = '', regex = True)
        album_df['lyrics'] = album_df['lyrics'].replace(to_replace =r'^.*?Lyrics', value = '', regex = True)
        album_df['lyrics'] = album_df['lyrics'].replace(to_replace = r'\d{2}Embed$', value = '', regex = True)
        album_df['lyrics'] = album_df['lyrics'].replace(to_replace = r'\d{1}Embed$', value = '', regex = True)
        album_df['track_no'] = album_df['track_no'].astype(int)

        artist_df = pd.concat([artist_df, album_df]).reset_index(drop=True)
        assert artist_df[artist_df['lyrics'].isna()].shape[0] == 0

    artist_df = artist_df[~artist_df['title'].str.contains('live|remix', case=False)].reset_index(drop=True)
    artist_df = artist_df.sort_values(by=['release_date','track_no']).reset_index(drop=True)
    artist_df.to_csv("data/MacMiller_AD.csv", index=False)

### Init Analysis

In [None]:
import pandas as pd
import os
from nltk.corpus import stopwords
from textblob import TextBlob
from textblob import Word
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
pd.options.display.max_colwidth = 17

analyzer = SentimentIntensityAnalyzer()

In [None]:
artist_df = pd.read_csv("data/MacMiller_AD.csv")
mac = artist_df.copy()
mac['lyrics'] = mac['lyrics'].astype(str)
mac['lyrics'] = mac['lyrics'].replace('\n', ' ')

In [None]:
# 2) Basic Pre-Processing
mac['polarity'] = mac['lyrics'].apply(lambda x: TextBlob(x).sentiment.polarity*100)
mac['polarity'] = mac['polarity'].round(3)

mac['subjectivity'] = mac['lyrics'].apply(lambda x: TextBlob(x).sentiment.subjectivity*100)
mac['subjectivity'] = mac['subjectivity'].round(3)

mac['Positive_Score'] = mac['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['pos']*100)
mac['Negative_Score'] = mac['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['neg']*100)
mac['Neutral_Score'] = mac['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['neu']*100)
mac['Compound_Score'] = mac['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['compound']*100)
mac = mac.sort_values('Compound_Score', ascending=False)

In [None]:
# Calculate album-level metrics
album_metrics = mac.groupby(['album']).agg({'Positive_Score': 'mean', 
                                            'Negative_Score': 'mean', 
                                            'Neutral_Score': 'mean', 
                                            'Compound_Score': 'mean'})

# Calculate overall average
overall_average = mac[['Positive_Score', 'Negative_Score', 'Neutral_Score', 'Compound_Score']].mean()

# Calculate relative difference
relative_difference = (album_metrics - overall_average) / overall_average

In [None]:
compound_sort = album_metrics.sort_values(by=['Compound_Score'], ascending = False)
positive_sort = album_metrics.sort_values(by=['Positive_Score'], ascending = False)
negative_sort = album_metrics.sort_values(by=['Negative_Score'], ascending = False)
neutral_sort = album_metrics.sort_values(by=['Neutral_Score'], ascending = False)

album_metrics['Variability'] = abs(album_metrics['Positive_Score'] - album_metrics['Negative_Score'])

In [None]:
mac.to_csv(f"data/MM_AllSongs_sentiment.csv", index = False)
album_metrics = album_metrics.reset_index()
album_metrics.to_csv(f"data/MM_Albums_sentiment.csv", index = False)

### Appendix

In [None]:
# mac = genius.search_artist('Mac Miller', max_songs = 0)
# artist = genius.search_artist("Mac Miller", max_songs=3, sort="title", include_features=False)

# mac = genius.search_artist('Mac Miller')
# mac.save_lyrics()
# artist.songs[1].title_with_featured

# song = artist.song("So it Goes")
# # song = genius.search_song("To You", artist.name)
# print(song.lyrics)

# artist.add_song(song)
# # artist.save_lyrics()

# print(artist)

# artist.name

# album = genius.search_album("Swimming", artist.name)
# album.save_lyrics()

# dir(album)

# x = album.tracks[1]
# x.save_lyrics()