In [None]:
##### Testing techniques between TextBlob and vaderSentiment and Afinn
##### Also using the lyricsgenius API vs AZ lyrics/other lyrics source (will validate for ease of use)
##### Testing only one song corrently at a time
##### Expected output: pos/neg/neutral for Vader, polarity/subjectivity for TextBlob

#####

###### Stretch goals: To be able to use the Canada Lexicon compilation study of word associations to emotions. Maybe score the song to produce an emotion associated with: anger, disgust, fear, joy, anticipation, sadness, surprise or trust. 

In [43]:
# Dependencies set up:

# Dataframe building, analysis
import pandas as pd
import numpy as np
import json

# Scraping data/lyrics
import lyricsgenius

# Data Preprocessing
from textblob import TextBlob
from nltk.corpus import stopwords
import contractions
import re
import string
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
from afinn import Afinn

### Preprocessing Steps ###

In [50]:
# API Test 
# lyrics_url = "https://www.azlyrics.com/lyrics/pharrellwilliams/happy.html"
# response = requests.get(lyrics_url)
# soup = BeautifulSoup(response.text, 'lxml')
# lyrics = soup.find('div', class_=None).text
# lyrics = lyrics.replace('\n', ' ').replace('\r', ' ')
# lyrics

import lyricsgenius
genius = lyricsgenius.Genius("9TKmoq_foRHodVBBuM-5doQJNej6I5gqgPk6Kkde_Qhxx3ZW6nOBZWGfHMnwCmuL")
artist = 'Eminem'
search_artist = genius.search_artist(artist, max_songs=1, sort="title")
song = genius.search_song("Lose Yourself", search_artist.name)
lyrics = song.lyrics
lyrics

Searching for songs by Eminem...

Song 1: "12 Days of Diss-Mas"

Reached user-specified song limit (1).
Done. Found 1 songs.
Searching for "Lose Yourself" by Eminem...
Done.


"[Intro]\nLook, if you had one shot or one opportunity\nTo seize everything you ever wanted in one moment\nWould you capture it, or just let it slip? Yo\n\n[Verse 1]\nHis palms are sweaty, knees weak, arms are heavy\nThere's vomit on his sweater already, mom's spaghetti\nHe's nervous, but on the surface he looks calm and ready\nTo drop bombs, but he keeps on forgetting\nWhat he wrote down, the whole crowd goes so loud\nHe opens his mouth, but the words won't come out\nHe's choking, how? Everybody's joking now\nThe clock's run out, time's up, over—blaow!\nSnap back to reality, ope there goes gravity, ope\nThere goes Rabbit, he choked, he's so mad but he won't\nGive up that easy, no, he won't have it, he knows\nHis whole back's to these ropes, it don't matter, he's dope\nHe knows that but he's broke, he's so stagnant, he knows\nWhen he goes back to this mobile home, that's when it's\nBack to the lab again yo, this old rap shit, he\nBetter go capture this moment and hope it don't pass him

In [4]:
#1 Cleaning the lyrics of words that describe parts of the song, such as [Chorus:], [Explicit:], [Verse 2:] etc. 
lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)

In [None]:
#2. Expand contractions
exp_lyrics = contractions.fix(lyrics)
exp_lyrics

In [None]:
#2. Converted all the words into lowercase in case capitalization interferes with the weight of the words, removed leading and trailing spaces. Also removes instance of () where it occurs, but keeps the words.
lyrics = exp_lyrics.lower().strip().replace('   ', ' ').replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
lyrics

In [None]:
#3. Remove punctuations
punc_lyrics = lyrics.translate(str.maketrans('','', string.punctuation))
punc_lyrics

In [None]:
#3. Tokenizing? 
tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
tokenized_lyrics

In [None]:
#4. Remove stopwords?
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

filtered_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
filtered_lyrics

In [None]:
#5. Convert list to string. Lemmatize?
from nltk.stem import WordNetLemmatizer
lemm = WordNetLemmatizer()

# Stringing the words together
lemmatized = ' '.join([lemm.lemmatize(words) for words in filtered_lyrics])

# [lemmatizer.lemmatize(w) for w in word_list]

# lemmatized = lemm.lemmatize(filtered_string)
lemmatized

In [64]:
# Function with VaderSentiment and adding Afinn words (words that can influence the scoring)
# Afinn-111.txt = classifying the words that may have affected the sentiment score

def vader_sentiment(lyrics):
    lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)
    lyrics_cont = contractions.fix(lyrics)
    char_lyrics = lyrics_cont.lower().strip().replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
    punc_lyrics = char_lyrics.translate(str.maketrans('','', string.punctuation))
    
    #Tokenize
    tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
    
    #Stop words have to be stored and loaded first
    stop_words = set(stopwords.words('english'))
    stop_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
    
    #Join for lemmatization
    lemm = WordNetLemmatizer()
    lemmatized = ' '.join([lemm.lemmatize(words) for words in stop_lyrics])
    
    #Sentiment Analysis
    analyzer = SentimentIntensityAnalyzer()
    text_sentiment = analyzer.polarity_scores(lemmatized)
    
    #Split the dictionary for graphing
    compound_score = text_sentiment.pop('compound')
    
    #Input and Output Afinn words
    afinn_text = pd.read_csv('AFINN-111.txt', names=["word", "score"], sep='\t')
    afinn_list = afinn_text['word'].values.tolist()
    lemmatized_tokens = nltk.word_tokenize(lemmatized)
    
    def common_elements_s(afinn_list, lemmatized_tokens):
        return list(set(afinn_list) & set(lemmatized_tokens))

    common = common_elements_s(afinn_list, lemmatized_tokens)
    common_string = ', '.join([str(x) for x in common]) 
    
    print(f'Words that can affect the sentiment scores: {common_string}')
    print('---'*30)
    print(text_sentiment)
    print('---'*30)
    print(f'Compound Score: {compound_score}')

In [65]:
vader_sentiment(lyrics)

Words that can affect the sentiment scores: choked, pain, shit, like, boring, goddamn, opportunity, mad, easy, better, love, calm, want, rage, hope, miss, lonely, nervous, god, escaping, drop, chance, bomb, fail, weak, matter, hard, best, choking, broke, success
------------------------------------------------------------------------------------------
{'neg': 0.141, 'neu': 0.645, 'pos': 0.214}
------------------------------------------------------------------------------------------
Compound Score: 0.9864
