In [1]:
# Initial imports
import os
from pathlib import Path
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

get_ipython().run_line_magic("matplotlib", "inline")


In [2]:
nltk.download("vader_lexicon")
analyzer = SentimentIntensityAnalyzer()


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\14694\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [23]:
aapl_file = Path('Web-Scraping-APP/AAPL_HEADLINES.csv')
btc_file = Path('Web-Scraping-APP/BTCUSA_HEADLINES.csv')
spy_file = Path('Web-Scraping-APP/SPY_HEADLINES.csv')
tsla_file = Path('Web-Scraping-APP/TSLA_HEADLINES.csv')

aapl_headlines = pd.read_csv(aapl_file)
btc_headlines = pd.read_csv(btc_file)
spy_headlines = pd.read_csv(spy_file)
tsla_headlines = pd.read_csv(tsla_file)
aapl_headlines.head(10)

Unnamed: 0,Headline,Date
0,"Apple Inc. stock falls Monday, underperforms m...","Nov. 9, 2020 at 4:30 p.m. ET"
1,Big Tech Stocks Are Lagging Today. Why They’ll...,"Nov. 9, 2020 at 1:45 p.m. ET"
2,"As Apple releases its new line of Macs, the bi...","Nov. 9, 2020 at 1:18 p.m. ET"
3,"In the Midst of Election Uncertainty, Younger ...","Nov. 6, 2020 at 9:21 p.m. ET"
4,Berkshire Buybacks Hit Record $9 Billion in Th...,"Nov. 7, 2020 at 8:49 a.m. ET"
5,This single-country stock picker has beaten th...,"Nov. 3, 2020 at 7:12 a.m. ET"
6,"Apple Inc. stock falls Friday, underperforms m...","Nov. 6, 2020 at 4:30 p.m. ET"
7,T-Mobile Stock Is at a Record High After Earni...,"Nov. 6, 2020 at 2:16 p.m. ET"
8,Dow's 25-point fall led by losses in UnitedHea...,"Nov. 6, 2020 at 10:53 a.m. ET"
9,"Dow falls 110 points on losses for Apple Inc.,...","Nov. 6, 2020 at 9:45 a.m. ET"


In [4]:
def get_sentiment(score):
    """
    Calculates the sentiment based on the compound score.
    """
    result = 0  # Neutral by default
    if score >= 0.05:  # Positive
        result = 1
    elif score <= -0.05:  # Negative
        result = -1

    return result


In [24]:
def create_sentiment_df(df):
    """
    Takes headlines DataFrame & creates DataFrame with Sentiment columns.
    Splits Date & Time, creates Time column and moves Date to Index.
    """
    title_sent = {
        "compound": [],
        "positive": [],
        "neutral": [],
        "negative": [],
        "sentiment": [],
    }

    for index, row in df.iterrows():
        try:
            # Sentiment scoring with VADER
            title_sentiment = analyzer.polarity_scores(row["Headline"])
            title_sent["compound"].append(title_sentiment["compound"])
            title_sent["positive"].append(title_sentiment["pos"])
            title_sent["neutral"].append(title_sentiment["neu"])
            title_sent["negative"].append(title_sentiment["neg"])
            title_sent["sentiment"].append(get_sentiment(title_sentiment["compound"]))
        except AttributeError:
            pass

    title_sent_df = pd.DataFrame(title_sent)
    #title_sent_df.head()

    headline_sentiment_df = df.join(title_sent_df)
    headline_sentiment_df.dropna()
    headline_sentiment_df['Date'] = headline_sentiment_df['Date'].str.replace('at','-')
    headline_sentiment_df['Time'] = headline_sentiment_df['Date'].str.split('-').str[1]
    headline_sentiment_df['Date'] = headline_sentiment_df['Date'].str.split('-').str[0]
    headline_sentiment_df = headline_sentiment_df.reindex(columns=['Date', 'Time', 'Headline', 'compound', 'positive', 'neutral', 'negative', 'sentiment'])
    headline_sentiment_df.set_index('Date', inplace=True)
    return headline_sentiment_df

In [25]:
aapl_headlines = create_sentiment_df(aapl_headlines)

In [26]:
aapl_headlines.reset_index()

Unnamed: 0,Date,Time,Headline,compound,positive,neutral,negative,sentiment
0,"Nov. 9, 2020",4:30 p.m. ET,"Apple Inc. stock falls Monday, underperforms m...",0.0000,0.000,1.000,0.000,0
1,"Nov. 9, 2020",1:45 p.m. ET,Big Tech Stocks Are Lagging Today. Why They’ll...,-0.0772,0.121,0.738,0.141,-1
2,"Nov. 9, 2020",1:18 p.m. ET,"As Apple releases its new line of Macs, the bi...",0.4767,0.193,0.807,0.000,1
3,"Nov. 6, 2020",9:21 p.m. ET,"In the Midst of Election Uncertainty, Younger ...",-0.3400,0.000,0.806,0.194,-1
4,"Nov. 7, 2020",8:49 a.m. ET,Berkshire Buybacks Hit Record $9 Billion in Th...,-0.1531,0.000,0.882,0.118,-1
...,...,...,...,...,...,...,...,...
9868,"Aug. 29, 2016",11:47 a.m. ET,Respect for America has climbed during the Oba...,0.4767,0.279,0.721,0.000,1
9869,"Aug. 29, 2016",9:41 a.m. ET,"Fitbit upgrades now track yoga, weightlifting ...",0.0000,0.000,1.000,0.000,0
9870,"Aug. 28, 2016",9:15 p.m. ET,5 things Tim Cook has done better at Apple tha...,0.4404,0.209,0.791,0.000,1
9871,"Aug. 27, 2016",11:02 a.m. ET,Want to invest in self-driving cars? Check out...,0.0772,0.126,0.874,0.000,1


In [29]:
aapl_headlines.groupby(['Date']).mean()

Unnamed: 0_level_0,compound,positive,neutral,negative,sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Apr. 1, 2017",0.113150,0.106500,0.893500,0.000000,0.500000
"Apr. 1, 2019",0.468340,0.297400,0.702600,0.000000,1.000000
"Apr. 1, 2020",-0.087443,0.045571,0.861714,0.092857,-0.428571
"Apr. 10, 2017",-0.127650,0.000000,0.920000,0.080000,-0.250000
"Apr. 10, 2018",0.105260,0.096000,0.904000,0.000000,0.600000
...,...,...,...,...,...
"Sep. 9, 2016",-0.059275,0.049500,0.872750,0.077750,-0.250000
"Sep. 9, 2017",-0.243400,0.053600,0.762600,0.183800,-0.400000
"Sep. 9, 2018",0.324300,0.145000,0.855000,0.000000,0.500000
"Sep. 9, 2019",0.065883,0.108667,0.828833,0.062500,0.166667
