In [1]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

get_ipython().run_line_magic("matplotlib", "inline")


In [2]:
nltk.download("vader_lexicon")
analyzer = SentimentIntensityAnalyzer()


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\annmi\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
aapl_file = Path('Web-Scraping-APP/AAPL_HEADLINES.csv')
btc_file = Path('Web-Scraping-APP/BTCUSA_HEADLINES.csv')
spy_file = Path('Web-Scraping-APP/SPY_HEADLINES.csv')
tsla_file = Path('Web-Scraping-APP/TSLA_HEADLINES.csv')

aapl_headlines = pd.read_csv(aapl_file)
btc_headlines = pd.read_csv(btc_file)
spy_headlines = pd.read_csv(spy_file)
tsla_headlines = pd.read_csv(tsla_file)
aapl_headlines.head()

Unnamed: 0,Headline,Date
0,"Apple Inc. stock falls Monday, underperforms m...","Nov. 9, 2020 at 4:30 p.m. ET"
1,Big Tech Stocks Are Lagging Today. Why They’ll...,"Nov. 9, 2020 at 1:45 p.m. ET"
2,"As Apple releases its new line of Macs, the bi...","Nov. 9, 2020 at 1:18 p.m. ET"
3,"In the Midst of Election Uncertainty, Younger ...","Nov. 6, 2020 at 9:21 p.m. ET"
4,Berkshire Buybacks Hit Record $9 Billion in Th...,"Nov. 7, 2020 at 8:49 a.m. ET"


In [4]:
def get_sentiment(score):
    """
    Calculates the sentiment based on the compound score.
    """
    result = 0  # Neutral by default
    if score >= 0.05:  # Positive
        result = 1
    elif score <= -0.05:  # Negative
        result = -1

    return result


In [5]:
def create_sentiment_df(df):
    """
    Takes headlines DataFrame & creates DataFrame with Sentiment columns.
    Moves Date to Index.
    """
    title_sent = {
        "compound": [],
        "positive": [],
        "neutral": [],
        "negative": [],
        "sentiment": [],
    }

    for index, row in df.iterrows():
        try:
            # Sentiment scoring with VADER
            title_sentiment = analyzer.polarity_scores(row["Headline"])
            title_sent["compound"].append(title_sentiment["compound"])
            title_sent["positive"].append(title_sentiment["pos"])
            title_sent["neutral"].append(title_sentiment["neu"])
            title_sent["negative"].append(title_sentiment["neg"])
            title_sent["sentiment"].append(get_sentiment(title_sentiment["compound"]))
        except AttributeError:
            pass

    title_sent_df = pd.DataFrame(title_sent)
    #title_sent_df.head()

    headline_sent_df = df.join(title_sent_df)
    headline_sent_df.set_index('Date', inplace=True)
    return headline_sent_df.head()

In [6]:
create_sentiment_df(aapl_headlines)

Unnamed: 0_level_0,Headline,compound,positive,neutral,negative,sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Nov. 9, 2020 at 4:30 p.m. ET","Apple Inc. stock falls Monday, underperforms m...",0.0,0.0,1.0,0.0,0
"Nov. 9, 2020 at 1:45 p.m. ET",Big Tech Stocks Are Lagging Today. Why They’ll...,-0.0772,0.121,0.738,0.141,-1
"Nov. 9, 2020 at 1:18 p.m. ET","As Apple releases its new line of Macs, the bi...",0.4767,0.193,0.807,0.0,1
"Nov. 6, 2020 at 9:21 p.m. ET","In the Midst of Election Uncertainty, Younger ...",-0.34,0.0,0.806,0.194,-1
"Nov. 7, 2020 at 8:49 a.m. ET",Berkshire Buybacks Hit Record $9 Billion in Th...,-0.1531,0.0,0.882,0.118,-1
