# #Edited from 12-NLP day 2 ex 09-Ins_Vader_Sentiment - Intro to VADER Sentiment


In [1]:
# Initial imports
import os
from path import Path
import pandas as pd
from newsapi import NewsApiClient
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer



In [2]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     D:\Users\StealthHindu\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [51]:
def retVComp(text):
    """
        Accepts a string of text and Returns compound from polarity scoring in the form of a FLOAT
    """
    return analyzer.polarity_scores(text)['compound']


In [81]:
## some of my examples. Trying to see exactly how COMPOUND SCORES work

# multi dimension List of Text, date and sentScore
toAnlyzList = [
                    ["I love bitcoin.",'01/01/2010'],
                    ["I like bitcoin.",'01/02/2010'],
                    ["bitcoin is the way to go.",'01/03/2010'],
                    ["can bitcoin go higher?",'01/04/2010'],
                    ["Hate bitcoin?",'01/05/2010'],
                    ["Like bitcoin?",'01/06/2010'],
                    ["can bitcoin succeed?",'01/07/2010'],
                    ["Millenials love bitcoin.",'01/08/2010'],
                    ["Why do Millenials love bitcoin?",'01/09/2010']
]


sent_df = pd.DataFrame(toAnlyzList,columns = ['text','Date'])
sent_df = sent_df.set_index('Date')
sent_df


Unnamed: 0_level_0,text
Date,Unnamed: 1_level_1
01/01/2010,I love bitcoin.
01/02/2010,I like bitcoin.
01/03/2010,bitcoin is the way to go.
01/04/2010,can bitcoin go higher?
01/05/2010,Hate bitcoin?
01/06/2010,Like bitcoin?
01/07/2010,can bitcoin succeed?
01/08/2010,Millenials love bitcoin.
01/09/2010,Why do Millenials love bitcoin?


In [82]:
### Just a quick example of how to get the polarity scores

for item in toAnlyzList:
    sent = analyzer.polarity_scores(item[0])
    print (f"{item[0]} - {sent}")

#     if sent['neu'] == 1:
#         calc = 0
#     elif sent['neg'] == 0:
#         calc = round(sent['pos'] - sent['neu'],4)
#     else:
#         calc = round(-1*(sent['neg'] - sent['neu']),4)
#print (f"{item} - {sent} - myCalc:{calc}")

I love bitcoin. - {'neg': 0.0, 'neu': 0.192, 'pos': 0.808, 'compound': 0.6369}
I like bitcoin. - {'neg': 0.0, 'neu': 0.286, 'pos': 0.714, 'compound': 0.3612}
bitcoin is the way to go. - {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
can bitcoin go higher? - {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Hate bitcoin? - {'neg': 0.787, 'neu': 0.213, 'pos': 0.0, 'compound': -0.5719}
Like bitcoin? - {'neg': 0.0, 'neu': 0.286, 'pos': 0.714, 'compound': 0.3612}
can bitcoin succeed? - {'neg': 0.0, 'neu': 0.385, 'pos': 0.615, 'compound': 0.4939}
Millenials love bitcoin. - {'neg': 0.0, 'neu': 0.323, 'pos': 0.677, 'compound': 0.6369}
Why do Millenials love bitcoin? - {'neg': 0.0, 'neu': 0.488, 'pos': 0.512, 'compound': 0.6369}


In [90]:
# this will show how to feed the dataframe into a function and get a newDF with scores out.
sentList = []
for row in sent_df['text']:
    sentList.append(retVComp(row))

sent_df['sentiment'] = sentList
sent_df


Unnamed: 0_level_0,text,sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
01/01/2010,I love bitcoin.,0.6369
01/02/2010,I like bitcoin.,0.3612
01/03/2010,bitcoin is the way to go.,0.0
01/04/2010,can bitcoin go higher?,0.0
01/05/2010,Hate bitcoin?,-0.5719
01/06/2010,Like bitcoin?,0.3612
01/07/2010,can bitcoin succeed?,0.4939
01/08/2010,Millenials love bitcoin.,0.6369
01/09/2010,Why do Millenials love bitcoin?,0.6369


## Below is code from the class activity showing how to perform SA with Vader on a DF and store results in said dataframe. It get the text from newsapi.org

In [6]:
# Reading the News API key enviroment variable
api_key = os.getenv("NEWSAPI")



In [7]:
# Create a newsapi client
newsapi = NewsApiClient(api_key=api_key)



In [8]:
# Fetch all the news about Facebook Libra
libra_headlines = newsapi.get_everything(
    q="facebook AND libra",
    language="en",
    page_size=100,
    sort_by="relevancy"
)

# Print total articles
print(f"Total articles about Facebook Libra: {libra_headlines['totalResults']}")

# Show sample article
libra_headlines["articles"][0]



Total articles about Facebook Libra: 79


{'source': {'id': 'the-next-web', 'name': 'The Next Web'},
 'author': 'Sunil Kumar Jha',
 'title': 'Why every online store needs a customer service chatbot',
 'description': 'In recent times, organizations have been competing with one another to implement chatbots for various reasons, including enhancing customer experience, streamlining processes, and fueling the demand for digital and innovative technologies. Cognitive technolog…',
 'url': 'https://thenextweb.com/syndication/2020/03/22/why-every-online-store-needs-a-customer-service-chatbot/',
 'urlToImage': 'https://img-cdn.tnwcdn.com/image/tnw?filter_last=1&fit=1280%2C640&url=https%3A%2F%2Fcdn0.tnwcdn.com%2Fwp-content%2Fblogs.dir%2F1%2Ffiles%2F2020%2F03%2F1-16.jpg&signature=a40902ffd96a01d7bb3338c2bafa5eff',
 'publishedAt': '2020-03-22T14:00:11Z',
 'content': 'In recent times, organizations have been competing with one another to implement chatbots for various reasons, including enhancing customer experience, streamlining processes

In [9]:
# Create the Facebook Libra sentiment scores DataFrame
libra_sentiments = []

for article in libra_headlines["articles"]:
    try:
        text = article["content"]
        date = article["publishedAt"][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        
        libra_sentiments.append({
            "text": text,
            "date": date,
            "compound": compound,
            "positive": pos,
            "negative": neg,
            "neutral": neu
            
        })
        
    except AttributeError:
        pass
    
# Create DataFrame
libra_df = pd.DataFrame(libra_sentiments)

# Reorder DataFrame columns
cols = ["date", "text", "compound", "positive", "negative", "neutral"]
libra_df = libra_df[cols]

libra_df.head()



Unnamed: 0,date,text,compound,positive,negative,neutral
0,2020-03-22,"In recent times, organizations have been compe...",0.34,0.078,0.04,0.882
1,2020-03-23,Ive written many posts on the global public cl...,0.4576,0.089,0.0,0.911
2,2020-04-01,Facebook(NASDAQ:FB) and Twitter(NYSE:TWTR) est...,0.0,0.0,0.0,1.0
3,2020-03-28,A source in the Capitol has confirmed that U.S...,-0.6801,0.0,0.136,0.864
4,2020-03-24,Marcelo M. Prates is a lawyer at the Central B...,0.0,0.0,0.0,1.0


In [10]:
# Get descriptive stats from the DataFrame
libra_df.describe()



Unnamed: 0,compound,positive,negative,neutral
count,78.0,78.0,78.0,78.0
mean,0.180428,0.076333,0.037321,0.886397
std,0.472545,0.076174,0.052339,0.086958
min,-0.8718,0.0,0.0,0.657
25%,-0.1712,0.0,0.0,0.833
50%,0.1767,0.057,0.0,0.8905
75%,0.605775,0.11575,0.06525,0.962
max,0.9595,0.309,0.191,1.0
