# Text Sentiment Analysis
The NLTK libraries include a few packages to help solve the issues we experienced in the gender classifier model. 

* First is the SentimentAnalyzer module, which allows you to include additional features using built-in functions.
* The second is called VADER, which stands for Valence Aware Dictionary and Sentiment Reasoner

In [1]:
# Warnings
import warnings
warnings.filterwarnings('ignore')

# BEGIN: fix Python or Notebook SSL CERTIFICATE_VERIFY_FAILED
import os, ssl
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
    ssl._create_default_https_context = ssl._create_unverified_context
# END: fix Python or Notebook SSL CERTIFICATE_VERIFY_FAILED

## Read website content

In [7]:
import urllib as url
import bs4 as bs
import html2text
import spacy
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from IPython.display import HTML, display

def sentiment_analysis (content):
    article_html = url.request.urlopen(content)
    article_html = article_html.read()
    html_2_text = html2text.HTML2Text()
    html_2_text.ignore_links = True
    article_txt = html_2_text.handle(article_html.decode('utf-8'))
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(article_txt)
    # print(f"Noun phrases: {chunk.text for chunk in doc.noun_chunks}")
    # print(f"Verbs: {token.lemma_ for token in doc if token.pos_ == 'VERB'}")
    
    analyzer = SentimentIntensityAnalyzer()
    content_analyzer = analyzer.polarity_scores(article_txt)
    # print(f"Positive score: {content_analyzer['pos']}")
    # print(f"Negative score: {content_analyzer['neg']}")
    # print(f"Neutral score: {content_analyzer['neu']}")
    predict_sentiment = ''
    if (content_analyzer['compound'] >= 0.3):
        predict_sentiment = "<font color='green'><b>POSITIVE</b></font>"
    elif ((content_analyzer['compound'] >= 0) & (content_analyzer['compound'] < 0.3)):
        predict_sentiment = "<font color='red'><b>NEUTRAL</b></font>"
    elif (content_analyzer['compound'] < 0):
        predict_sentiment = "<font color='red'><b>NEGATIVE</b></font>"
    return predict_sentiment

### Predict sentiment on the content

In [8]:
content = "https://www.scu.edu/ethics-in-technology-practice/ethical-toolkit/"

ethical_toolkit = sentiment_analysis(content)
    
display(HTML(f"\nPredicted sentiment is {ethical_toolkit} for the content url: {content} "))

In [9]:
content = "https://www.theatlantic.com/technology/archive/2018/03/got-99-problems-but-a-trolley-aint-one/556805/"

ethics = sentiment_analysis(content)
    
display(HTML(f"\nPredicted sentiment is {ethics} for the content url: {content} "))

In [11]:
content = "https://www.eastern.edu/data"

datascience = sentiment_analysis(content)
    
display(HTML(f"\nPredicted sentiment is {datascience} for the content url: {content} "))

In [14]:
content = "https://www.npr.org/sections/health-shots/2021/07/09/1014512213/covid-is-surging-in-new-hotspots-driven-by-low-vaccination-rates"

summeronline = sentiment_analysis(content)
    
display(HTML(f"\nPredicted sentiment is {summeronline} for the content url: {content} "))