In [None]:
##### Testing techniques between TextBlob and vaderSentiment
##### Also using the lyricsgenius API vs AZ lyrics/other lyrics source (will validate for ease of use)
##### Testing only one song corrently at a time
##### Expected output: pos/neg/neutral for Vader, polarity/subjectivity for TextBlob

#####

###### Stretch goals: To be able to use the Canada Lexicon compilation study of word associations to emotions. Maybe score the song to produce an emotion associated with: anger, disgust, fear, joy, anticipation, sadness, surprise or trust. 

In [26]:
# Dependencies set up:

# Dataframe building, analysis
import pandas as pd
import numpy as np

# Scraping data/lyrics
import lyricsgenius

# Data Preprocessing
from textblob import TextBlob
from nltk.corpus import stopwords
import contractions
import re
import string
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
from afinn import Afinn

### Preprocessing Steps ###

In [29]:
# API Test 
# lyrics_url = "https://www.azlyrics.com/lyrics/pharrellwilliams/happy.html"
# response = requests.get(lyrics_url)
# soup = BeautifulSoup(response.text, 'lxml')
# lyrics = soup.find('div', class_=None).text
# lyrics = lyrics.replace('\n', ' ').replace('\r', ' ')
# lyrics

import lyricsgenius
genius = lyricsgenius.Genius("9TKmoq_foRHodVBBuM-5doQJNej6I5gqgPk6Kkde_Qhxx3ZW6nOBZWGfHMnwCmuL")
artist = 'Taylor Swift'
search_artist = genius.search_artist(artist, max_songs=1, sort="title")
song = genius.search_song("Shake It Off", search_artist.name)
lyrics = song.lyrics
lyrics

Searching for songs by Taylor Swift...

"1989 [Booklet]" is not valid. Skipping.
"1989 Interview with Paul McGuire" is not valid. Skipping.
"1989 [Liner Notes]" is not valid. Skipping.
"1989 Tour Setlist" is not valid. Skipping.
Song 1: "1989 World Tour Dates"

Reached user-specified song limit (1).
Done. Found 1 songs.
Searching for "Shake It Off" by Taylor Swift...
Done.


"[Verse 1]\nI stay out too late\nGot nothing in my brain\nThat's what people say, mmm, mmm\nThat's what people say, mmm, mmm\nI go on too many dates\nBut I can't make them stay\nAt least that's what people say, mmm, mmm\nThat's what people say, mmm, mmm\n\n[Pre-Chorus 1]\nBut I keep cruisin'\nCan't stop, won't stop movin'\nIt's like I got this music in my mind\nSaying it's gonna be alright\n\n[Chorus]\n'Cause the players gonna play, play, play, play, play\nAnd the haters gonna hate, hate, hate, hate, hate\nBaby, I'm just gonna shake, shake, shake, shake, shake\nI shake it off, I shake it off\nHeartbreakers gonna break, break, break, break, break\nAnd the fakers gonna fake, fake, fake, fake, fake\nBaby, I'm just gonna shake, shake, shake, shake, shake\nI shake it off, I shake it off\n\n[Verse 2]\nI never miss a beat\nI'm lightning on my feet\nAnd that's what they don’t see, mmm, mmm\nThat's what they don’t see, mmm, mmm\nI'm dancing on my own (Dancing on my own)\nI make the moves up as 

In [47]:
#1 Cleaning the lyrics of words that describe parts of the song, such as [Chorus:], [Explicit:], [Verse 2:] etc. 
lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)

In [48]:
#2. Expand contractions
exp_lyrics = contractions.fix(lyrics)
exp_lyrics

"\nI stay out too late\nGot nothing in my brain\nthat is what people say, mmm, mmm\nthat is what people say, mmm, mmm\nI go on too many dates\nBut I can not make them stay\nAt least that is what people say, mmm, mmm\nthat is what people say, mmm, mmm\n\n\nBut I keep cruisin'\ncan not stop, will not stop movin'\nit is like I got this music in my mind\nSaying it is going to be alright\n\n\nbecause the players going to play, play, play, play, play\nAnd the haters going to hate, hate, hate, hate, hate\nBaby, I am just going to shake, shake, shake, shake, shake\nI shake it off, I shake it off\nHeartbreakers going to break, break, break, break, break\nAnd the fakers going to fake, fake, fake, fake, fake\nBaby, I am just going to shake, shake, shake, shake, shake\nI shake it off, I shake it off\n\n\nI never miss a beat\nI am lightning on my feet\nAnd that is what they do not see, mmm, mmm\nthat is what they do not see, mmm, mmm\nI am dancing on my own (Dancing on my own)\nI make the moves up 

In [49]:
#2. Converted all the words into lowercase in case capitalization interferes with the weight of the words, removed leading and trailing spaces. Also removes instance of () where it occurs, but keeps the words.
lyrics = exp_lyrics.lower().strip().replace('   ', ' ').replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
lyrics

"i stay out too late got nothing in my brain that is what people say, mmm, mmm that is what people say, mmm, mmm i go on too many dates but i can not make them stay at least that is what people say, mmm, mmm that is what people say, mmm, mmm   but i keep cruisin' can not stop, will not stop movin' it is like i got this music in my mind saying it is going to be alright   because the players going to play, play, play, play, play and the haters going to hate, hate, hate, hate, hate baby, i am just going to shake, shake, shake, shake, shake i shake it off, i shake it off heartbreakers going to break, break, break, break, break and the fakers going to fake, fake, fake, fake, fake baby, i am just going to shake, shake, shake, shake, shake i shake it off, i shake it off   i never miss a beat i am lightning on my feet and that is what they do not see, mmm, mmm that is what they do not see, mmm, mmm i am dancing on my own dancing on my own i make the moves up as i go moves up as i go and that i

In [50]:
#3. Remove punctuations
punc_lyrics = lyrics.translate(str.maketrans('','', string.punctuation))
punc_lyrics

'i stay out too late got nothing in my brain that is what people say mmm mmm that is what people say mmm mmm i go on too many dates but i can not make them stay at least that is what people say mmm mmm that is what people say mmm mmm   but i keep cruisin can not stop will not stop movin it is like i got this music in my mind saying it is going to be alright   because the players going to play play play play play and the haters going to hate hate hate hate hate baby i am just going to shake shake shake shake shake i shake it off i shake it off heartbreakers going to break break break break break and the fakers going to fake fake fake fake fake baby i am just going to shake shake shake shake shake i shake it off i shake it off   i never miss a beat i am lightning on my feet and that is what they do not see mmm mmm that is what they do not see mmm mmm i am dancing on my own dancing on my own i make the moves up as i go moves up as i go and that is what they do not know mmm mmm that is wha

In [51]:
#3. Tokenizing? 
tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
tokenized_lyrics

['i',
 'stay',
 'out',
 'too',
 'late',
 'got',
 'nothing',
 'in',
 'my',
 'brain',
 'that',
 'is',
 'what',
 'people',
 'say',
 'mmm',
 'mmm',
 'that',
 'is',
 'what',
 'people',
 'say',
 'mmm',
 'mmm',
 'i',
 'go',
 'on',
 'too',
 'many',
 'dates',
 'but',
 'i',
 'can',
 'not',
 'make',
 'them',
 'stay',
 'at',
 'least',
 'that',
 'is',
 'what',
 'people',
 'say',
 'mmm',
 'mmm',
 'that',
 'is',
 'what',
 'people',
 'say',
 'mmm',
 'mmm',
 'but',
 'i',
 'keep',
 'cruisin',
 'can',
 'not',
 'stop',
 'will',
 'not',
 'stop',
 'movin',
 'it',
 'is',
 'like',
 'i',
 'got',
 'this',
 'music',
 'in',
 'my',
 'mind',
 'saying',
 'it',
 'is',
 'going',
 'to',
 'be',
 'alright',
 'because',
 'the',
 'players',
 'going',
 'to',
 'play',
 'play',
 'play',
 'play',
 'play',
 'and',
 'the',
 'haters',
 'going',
 'to',
 'hate',
 'hate',
 'hate',
 'hate',
 'hate',
 'baby',
 'i',
 'am',
 'just',
 'going',
 'to',
 'shake',
 'shake',
 'shake',
 'shake',
 'shake',
 'i',
 'shake',
 'it',
 'off',
 'i',
 

In [52]:
#4. Remove stopwords?
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

filtered_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
filtered_lyrics

['stay',
 'late',
 'got',
 'nothing',
 'brain',
 'people',
 'say',
 'mmm',
 'mmm',
 'people',
 'say',
 'mmm',
 'mmm',
 'go',
 'many',
 'dates',
 'make',
 'stay',
 'least',
 'people',
 'say',
 'mmm',
 'mmm',
 'people',
 'say',
 'mmm',
 'mmm',
 'keep',
 'cruisin',
 'stop',
 'stop',
 'movin',
 'like',
 'got',
 'music',
 'mind',
 'saying',
 'going',
 'alright',
 'players',
 'going',
 'play',
 'play',
 'play',
 'play',
 'play',
 'haters',
 'going',
 'hate',
 'hate',
 'hate',
 'hate',
 'hate',
 'baby',
 'going',
 'shake',
 'shake',
 'shake',
 'shake',
 'shake',
 'shake',
 'shake',
 'heartbreakers',
 'going',
 'break',
 'break',
 'break',
 'break',
 'break',
 'fakers',
 'going',
 'fake',
 'fake',
 'fake',
 'fake',
 'fake',
 'baby',
 'going',
 'shake',
 'shake',
 'shake',
 'shake',
 'shake',
 'shake',
 'shake',
 'never',
 'miss',
 'beat',
 'lightning',
 'feet',
 'see',
 'mmm',
 'mmm',
 'see',
 'mmm',
 'mmm',
 'dancing',
 'dancing',
 'make',
 'moves',
 'go',
 'moves',
 'go',
 'know',
 'mmm',
 '

In [53]:
#5. Convert list to string. Lemmatize?
from nltk.stem import WordNetLemmatizer
lemm = WordNetLemmatizer()

# Stringing the words together
lemmatized = ' '.join([lemm.lemmatize(words) for words in filtered_lyrics])

# [lemmatizer.lemmatize(w) for w in word_list]

# lemmatized = lemm.lemmatize(filtered_string)
lemmatized

'stay late got nothing brain people say mmm mmm people say mmm mmm go many date make stay least people say mmm mmm people say mmm mmm keep cruisin stop stop movin like got music mind saying going alright player going play play play play play hater going hate hate hate hate hate baby going shake shake shake shake shake shake shake heartbreaker going break break break break break faker going fake fake fake fake fake baby going shake shake shake shake shake shake shake never miss beat lightning foot see mmm mmm see mmm mmm dancing dancing make move go move go know mmm mmm know mmm mmm keep cruisin stop stop groovin like got music mind saying going alright player going play play play play play hater going hate hate hate hate hate baby going shake shake shake shake shake shake shake heartbreaker going break break break break break faker going fake fake fake fake fake baby going shake shake shake shake shake shake shake shake shake shake shake shake shake shake shake hey hey hey think gettin

### TextBlob Section ###

In [54]:
# 6. TextBlob Sentiment?
from textblob import TextBlob
text_sentiment = TextBlob(lemmatized).sentiment
text_sentiment

Sentiment(polarity=-0.5470933828076686, subjectivity=0.8907544836116263)

### VaderSentiment Section ###

In [55]:
lemmatized
analyzer = SentimentIntensityAnalyzer()

In [56]:
vader = analyzer.polarity_scores(lemmatized)
vader

{'neg': 0.597, 'neu': 0.284, 'pos': 0.12, 'compound': -0.9996}

### Testing Afinn Sentiment ###

In [57]:
from afinn import Afinn

In [58]:
afinn = Afinn()
afinn_test = afinn.score(lyrics)

In [334]:
afinn_test

-120.0

## Working Functions ##

In [60]:
# Function with TextBlob

def text_blob_sentiment(lyrics):
    lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)
    lyrics_cont = contractions.fix(lyrics)
    char_lyrics = lyrics_cont.lower().strip().replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
    punc_lyrics = char_lyrics.translate(str.maketrans('','', string.punctuation))
    
    #Tokenize
    tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
    
    #Stop words have to be stored and loaded first
    stop_words = set(stopwords.words('english'))
    stop_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
    
    #Join for lemmatization
    lemm = WordNetLemmatizer()
    
    lemmatized = ' '.join([lemm.lemmatize(words) for words in stop_lyrics])
    text_sentiment = TextBlob(lemmatized).sentiment
    
    print(text_sentiment)

In [46]:
text_blob_sentiment(lyrics)

Sentiment(polarity=-0.5470933828076686, subjectivity=0.8907544836116263)


In [332]:
# Function with VaderSentiment

def vader_sentiment(lyrics):
    lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)
    lyrics_cont = contractions.fix(lyrics)
    char_lyrics = lyrics_cont.lower().strip().replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
    punc_lyrics = char_lyrics.translate(str.maketrans('','', string.punctuation))
    
    #Tokenize
    tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
    
    #Stop words have to be stored and loaded first
    stop_words = set(stopwords.words('english'))
    stop_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
    
    #Join for lemmatization
    lemm = WordNetLemmatizer()
    
    lemmatized = ' '.join([lemm.lemmatize(words) for words in stop_lyrics])
    
    #Sentiment Analysis
    analyzer = SentimentIntensityAnalyzer()
    text_sentiment = analyzer.polarity_scores(lemmatized)
    
    print(text_sentiment)

In [333]:
vader_sentiment(lyrics)

{'neg': 0.597, 'neu': 0.284, 'pos': 0.12, 'compound': -0.9996}


In [27]:
# Function with Afinn

def afinn_sentiment(lyrics):
    lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)
    lyrics_cont = contractions.fix(lyrics)
    char_lyrics = lyrics_cont.lower().strip().replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
    punc_lyrics = char_lyrics.translate(str.maketrans('','', string.punctuation))
    
    #Tokenize
    tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
    
    #Stop words have to be stored and loaded first
    stop_words = set(stopwords.words('english'))
    stop_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
    
    #Join for lemmatization
    lemm = WordNetLemmatizer()
    
    lemmatized = ' '.join([lemm.lemmatize(words) for words in stop_lyrics])
    
    afinn = Afinn()
    text_sentiment = afinn.score(lemmatized)
    
    print(text_sentiment)

In [28]:
afinn_sentiment(lyrics)

NameError: name 'lyrics' is not defined

## Testing For NRC ## -- not working

In [None]:
# attempt found on root copy of jupyter notebook

### Afinn Words Testing ###
##### I've entered another hell in word looping, just like NRC

In [19]:
afinn_text = pd.read_csv('AFINN-111.txt', names=["word", "score"], sep='\t')
afinn_text

Unnamed: 0,word,score
0,abandon,-2
1,abandoned,-2
2,abandons,-2
3,abducted,-2
4,abduction,-2
...,...,...
2472,yucky,-2
2473,yummy,3
2474,zealot,-2
2475,zealots,-2


In [25]:
def afinn_test(phrase):

0      -2
1      -2
2      -2
3      -2
4      -2
       ..
2472   -2
2473    3
2474   -2
2475   -2
2476    2
Name: score, Length: 2477, dtype: int64

In [23]:
afinn_drop

Unnamed: 0,word
0,abandon
1,abandoned
2,abandons
3,abducted
4,abduction
...,...
2472,yucky
2473,yummy
2474,zealot
2475,zealots
