In [None]:
##### Testing techniques between TextBlob and vaderSentiment
##### Also using the lyricsgenius API vs AZ lyrics/other lyrics source (will validate for ease of use)
##### Testing only one song corrently at a time
##### Expected output: pos/neg/neutral for Vader, polarity/subjectivity for TextBlob

#####

###### Stretch goals: (secret po)

In [10]:
# Dependencies set up:

# Dataframe building, analysis
import pandas as pd
import numpy as np

# Scraping data/lyrics
import lyricsgenius

# Data Preprocessing
from textblob import TextBlob
from nltk.corpus import stopwords
import contractions
import re
import string
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer

### TextBlob Section ###

In [11]:
import lyricsgenius
genius = lyricsgenius.Genius("9TKmoq_foRHodVBBuM-5doQJNej6I5gqgPk6Kkde_Qhxx3ZW6nOBZWGfHMnwCmuL")
artist = "Adele"
search_artist = genius.search_artist(artist, max_songs=1, sort="title")
song = genius.search_song("All I Ask", search_artist.name)
lyrics = song.lyrics
lyrics

Searching for songs by Adele...

"19 [Booklet]" is not valid. Skipping.
Song 1: "2017 Grammy’s Song of the Year Speech"

Reached user-specified song limit (1).
Done. Found 1 songs.
Searching for "All I Ask" by Adele...
Done.


"[Verse 1]\nI will leave my heart at the door\nI won't say a word\nThey've all been said before, you know\nSo why don't we just play pretend\nLike we're not scared of what is coming next\nOr scared of having nothing left?\n\n[Pre-Chorus]\nLook, don't get me wrong\nI know there is no tomorrow\nAll I ask is\n\n[Chorus]\nIf this is my last night with you\nHold me like I'm more than just a friend\nGive me a memory I can use\nTake me by the hand while we do\nWhat lovers do\nIt matters how this ends\n'Cause what if I never love again?\n\n[Verse 2]\nI don't need your honesty\nIt's already in your eyes\nAnd I'm sure my eyes, they speak for me\nNo one knows me like you do\nAnd since you're the only one that mattered\nTell me, who do I run to?\n\n[Pre-Chorus]\nLook, don't get me wrong\nI know there is no tomorrow\nAll I ask is\n\n[Chorus]\nIf this is my last night with you\nHold me like I'm more than just a friend\nGive me a memory I can use\nTake me by the hand while we do\nWhat lovers do\nIt m

In [None]:
# API Test 
# lyrics_url = "https://www.azlyrics.com/lyrics/pharrellwilliams/happy.html"
# response = requests.get(lyrics_url)
# soup = BeautifulSoup(response.text, 'lxml')
# lyrics = soup.find('div', class_=None).text
# lyrics = lyrics.replace('\n', ' ').replace('\r', ' ')
# lyrics

In [None]:
#1 Cleaning the lyrics of words that describe parts of the song, such as [Chorus:], [Explicit:], [Verse 2:] etc. 
lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)

In [None]:
#2. Expand contractions
exp_lyrics = contractions.fix(lyrics)
exp_lyrics

In [None]:
#2. Converted all the words into lowercase in case capitalization interferes with the weight of the words, removed leading and trailing spaces. Also removes instance of () where it occurs, but keeps the words.
lyrics = exp_lyrics.lower().strip().replace('   ', ' ').replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
lyrics

In [None]:
#3. Remove punctuations
punc_lyrics = lyrics.translate(str.maketrans('','', string.punctuation))
punc_lyrics

In [None]:
#3. Tokenizing? 
tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
tokenized_lyrics

In [None]:
#4. Remove stopwords?
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

filtered_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
filtered_lyrics

In [None]:
#5. Convert list to string. Lemmatize?
from nltk.stem import WordNetLemmatizer
lemm = WordNetLemmatizer()

lemmatized = ' '.join([lemm.lemmatize(words) for words in filtered_lyrics])

# [lemmatizer.lemmatize(w) for w in word_list]

# lemmatized = lemm.lemmatize(filtered_string)
lemmatized

In [None]:
# 6. TextBlob Sentiment?
from textblob import TextBlob
text_sentiment = TextBlob(lemmatized).sentiment
text_sentiment

### VaderSentiment Section ###

In [None]:
lemmatized

In [None]:
analyzer = SentimentIntensityAnalyzer()

In [None]:
polarity_v = analyzer.polarity_scores(punc_lyrics)
polarity_v

In [12]:
# Function with TextBlob

def text_blob_sentiment(lyrics):
    lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)
    lyrics_cont = contractions.fix(lyrics)
    char_lyrics = lyrics_cont.lower().strip().replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
    punc_lyrics = char_lyrics.translate(str.maketrans('','', string.punctuation))
    
    #Tokenize
    tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
    
    #Stop words have to be stored and loaded first
    stop_words = set(stopwords.words('english'))
    stop_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
    
    #Join for lemmatization
    lemm = WordNetLemmatizer()
    
    lemmatized = ' '.join([lemm.lemmatize(words) for words in stop_lyrics])
    text_sentiment = TextBlob(lemmatized).sentiment
    
    print(text_sentiment)

In [13]:
text_blob_sentiment(lyrics)

Sentiment(polarity=-0.19642857142857142, subjectivity=0.5206349206349206)


In [14]:
# Function with VaderSentiment

def vader_sentiment(lyrics):
    lyrics = re.sub('\\[[^\\]]*\\]', '', lyrics)
    lyrics_cont = contractions.fix(lyrics)
    char_lyrics = lyrics_cont.lower().strip().replace('(', '').replace(')', '').replace('\n', ' ').replace('\r', ' ')
    punc_lyrics = char_lyrics.translate(str.maketrans('','', string.punctuation))
    
    #Tokenize
    tokenized_lyrics = nltk.word_tokenize(punc_lyrics)
    
    #Stop words have to be stored and loaded first
    stop_words = set(stopwords.words('english'))
    stop_lyrics = [lyrics for lyrics in tokenized_lyrics if not lyrics in stop_words]
    
    #Join for lemmatization
    lemm = WordNetLemmatizer()
    
    lemmatized = ' '.join([lemm.lemmatize(words) for words in stop_lyrics])
    
    #Sentiment Analysis
    analyzer = SentimentIntensityAnalyzer()
    text_sentiment = analyzer.polarity_scores(lemmatized)
    
    print(text_sentiment)

In [15]:
vader_sentiment(lyrics)

{'neg': 0.211, 'neu': 0.421, 'pos': 0.368, 'compound': 0.9685}
