In [1]:
# importing libraries
import pandas as pd
import sqlalchemy as db

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

from api_keys import *

In [2]:
# connecting to SQL database
engine = db.create_engine(f"postgresql+psycopg2://{SQL_USER}:{SQL_PASS}@{SQL_HOST}/{SQL_DB}")
connection = engine.connect()

In [6]:
# extracting data from database
lyrics_query = '''select * from lyrics'''
SQL_Query = pd.read_sql_query(query, connection)
lyrics_df = pd.DataFrame(SQL_Query)

In [135]:
# adding columns to the dataframe
lyrics_len = len(lyrics_df)
summary = {'neg': [0.0]*lyrics_len, 'neu': [0.0]*lyrics_len, 'pos': [0.0]*lyrics_len, 'compound': [0.0]*lyrics_len}
scores = pd.DataFrame.from_dict(summary)
lyrics_scores = pd.concat([lyrics_df, scores], axis=1)

In [128]:
# list of stopwords from nltk
stop_words = set(stopwords.words("english"))

# initializing lemmatizer
lem = WordNetLemmatizer()

# initializing sentiment analyzer
sid = SentimentIntensityAnalyzer()

In [129]:
# text-processing functions

# removing stopwords from tokenized lyrics
def filter_words(tokenized_words: list) -> list:
    filtered_words=[]
    for w in tokenized_words:
        if w not in stop_words:
            filtered_words.append(w)
    return filtered_words

# getting part of speech of a word
def get_wordnet_pos(word):
    """Map POS tag to first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}

    return tag_dict.get(tag, wordnet.NOUN)

# lemmatizing words - reducing words to their base word
def lemmatize_words(filtered_words: list) -> list:
    lemmatized_words=[]
    for w in filtered_words:
        lemmatized_words.append(lem.lemmatize(w, get_wordnet_pos(w)))
    return lemmatized_words

In [167]:
# iterating through the dataframe and calculating scores for the lyrics of each song

for index, row in lyrics_scores.iterrows():
    lyrics = row.lyrics
    
    if lyrics:
        
        #tokenizing words
        tokenized_words = word_tokenize(lyrics)
        
        # filtering out stopwords
        filtered_words = filter_words(tokenized_words)
        
        # lemmatizing words
        lemmatized_words = lemmatize_words(filtered_words)
    
        # combining list of words back into a single string
        words_text = ' '.join(lemmatized_words)
    
        # scoring the lyrics
        text_scores = sid.polarity_scores(words_text)
    
        # updating scores in dataframe
        lyrics_scores.loc[lyrics_scores['index'] == index, ['neg','neu', 'pos', 'compound']] = text_scores.values()

In [168]:
lyrics_scores

Unnamed: 0,index,title,album,year,lyrics,image,neg,neu,pos,compound
0,0,"​thank u, next","thank u, next",2018-11-03,Thought I'd end up with Sean\nBut he wasn't a ...,https://images.genius.com/5b04050228012fd94051...,0.078,0.458,0.463,0.9994
1,1,Without Me,Manic (Target Exclusive),2018-10-04,Found you when your heart was broke\nI filled ...,https://images.genius.com/3ba32b2a382530392682...,0.127,0.799,0.074,-0.8355
2,2,All I Want for Christmas Is You,Merry Christmas,1994-11-01,I don't want a lot for Christmas\nThere is jus...,https://images.genius.com/c34735d6253d3e85a386...,0.121,0.744,0.136,0.9594
3,3,SICKO MODE,ASTROWORLD,2018-08-03,"Astro, yeah\nSun is down, freezin' cold\nThat'...",https://images.genius.com/9c8508d3056b146aee2a...,0.107,0.596,0.298,0.9986
4,4,VIRTUAL REALITY REVOLUTIONIZES CONCERTS: NEXT ...,,2019-10-07,"Author: Jorge Ramos\nYou did read it, and it w...",https://images.genius.com/d751211ca0e16eae2d8b...,0.041,0.781,0.178,0.9939
...,...,...,...,...,...,...,...,...,...,...
446,446,Slide,HER3*,2019-09-27,You always wearin' them glasses\nYou don't wan...,https://images.genius.com/a1df7845d51d3305c9e0...,0.078,0.835,0.087,-0.3891
447,447,October 2019 Singles Release Calendar,2019 Singles Release Calendar,,"10/1\nAnt Clemons - ""4 Letter Word"" ft. Timbal...",https://images.genius.com/cd5f415d2040c8939d0e...,0.088,0.821,0.091,0.8711
448,448,Tip Toe,Please Excuse Me for Being Antisocial,2019-11-25,"Rose gold Rollie, I'm the nigga put the whole ...",https://images.genius.com/e40bbb5187632692795d...,0.213,0.715,0.072,-0.9969
449,449,BEST ON EARTH,SHAKE THE SNOW GLOBE,2019-10-18,"Like the way she work, ayy\nShe always puts me...",https://images.genius.com/9c15820cb1809f83b342...,0.132,0.646,0.222,0.9825


In [169]:
# sending table to SQL
lyrics_scores.to_sql("lyrics_scores", connection, if_exists = "replace")