In [1]:
# Sentiment Analysis Working Notebook

In [2]:
# Initial imports
import pandas as pd
from pathlib import Path
import os
from dotenv import load_dotenv
import nltk as nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

get_ipython().run_line_magic("matplotlib", "inline")
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/bkamalnivas/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
def get_ticker_news(ticker):
    data_csv_path = Path("Resources/news/" + ticker + ".csv")
    data_df = pd.read_csv(data_csv_path)
    data_df['Date'] = pd.to_datetime(data_df['Date']).dt.date
    data_df = data_df.groupby('Date')['Title', 'Description'].agg(lambda column: ". ".join(column))
    return data_df
    

def get_ticker_price():
    print('in progress...')

In [13]:
# Sentiment calculation based on compound score
def get_sentiment(score):
    """
    Calculates the sentiment based on the compound score.
    """
    result = 0  # Neutral by default
    if score >= 0.05:  # Positive
        result = 1
    elif score <= -0.05:  # Negative
        result = -1

    return result

In [18]:
def get_sentiment_score(ticker):
    data_df = get_ticker_news(ticker)
    data_df = data_df.reset_index()
    title_sent = {
    "title_compound": [],
    "title_pos": [],
    "title_neu": [],
    "title_neg": [],
    "title_sent": [],
    }
    text_sent = {
    "text_compound": [],
    "text_pos": [],
    "text_neu": [],
    "text_neg": [],
    "text_sent": [],
    }

    # Get sentiment for the text and the title
    for index, row in data_df.iterrows():
        try:
            # Sentiment scoring with VADER
            title_sentiment = analyzer.polarity_scores(row["Title"])
            title_sent["title_compound"].append(title_sentiment["compound"])
            title_sent["title_pos"].append(title_sentiment["pos"])
            title_sent["title_neu"].append(title_sentiment["neu"])
            title_sent["title_neg"].append(title_sentiment["neg"])
            title_sent["title_sent"].append(get_sentiment(title_sentiment["compound"]))

            text_sentiment = analyzer.polarity_scores(row["Description"])
            text_sent["text_compound"].append(text_sentiment["compound"])
            text_sent["text_pos"].append(text_sentiment["pos"])
            text_sent["text_neu"].append(text_sentiment["neu"])
            text_sent["text_neg"].append(text_sentiment["neg"])
            text_sent["text_sent"].append(get_sentiment(text_sentiment["compound"]))
        except AttributeError:
            pass

    # Attaching sentiment columns to the News DataFrame
    title_sentiment_df = pd.DataFrame(title_sent)
    text_sentiment_df = pd.DataFrame(text_sent)
    data_df = data_df.join(title_sentiment_df).join(text_sentiment_df)
    return data_df
    
    

In [24]:
ticker_list = ['AMZN','ARKK','JNJ','NVDA','TSLA','XOM']
ticker_sent = {}

for ticker in ticker_list:
   data_df = get_sentiment_score(ticker)
   ticker_sent[ticker] = data_df
   #print(data_df.head())

ticker_sent['ARKK'].tail()
    
#get_ticker_news('NVDA')
#data_df = data_df.reset_index()
#data_df.tail()


  """


Unnamed: 0,Date,Title,Description,title_compound,title_pos,title_neu,title_neg,title_sent,text_compound,text_pos,text_neu,text_neg,text_sent
127,2022-05-27,Long Leading Forecast Update: Recession Watch ...,There are 7 long leading indicators from the f...,-0.4215,0.0,0.906,0.094,-1,-0.2263,0.057,0.887,0.056,-1
128,2022-05-28,Week Ahead - Another Action-Packed Week. Optim...,It’s likely to be another action-packed week d...,0.1779,0.119,0.785,0.096,1,-0.1531,0.041,0.895,0.063,-1
129,2022-05-30,"Bear Markets And Recessions, Not Inevitable - ...",If investors start selling before the declarat...,0.0,0.0,1.0,0.0,0,0.4215,0.094,0.906,0.0,1
130,2022-05-31,Billionaire Ray Dalio Says Cash Is Trash And E...,"Ray Dalio cautioned investors away from cash, ...",-0.7089,0.0,0.803,0.197,-1,-0.0516,0.086,0.844,0.07,-1
131,2022-06-01,Cathie Wood Scoops Up $1.3M In Tesla As Stock ...,Cathie Wood-led Ark Investment Management adde...,0.0,0.0,1.0,0.0,0,0.4767,0.101,0.867,0.032,1


In [None]:
data_df = data_df.reset_index()
data_df

In [None]:
# Sentiment scores dictionaries
title_sent = {
    "title_compound": [],
    "title_pos": [],
    "title_neu": [],
    "title_neg": [],
    "title_sent": [],
}
text_sent = {
    "text_compound": [],
    "text_pos": [],
    "text_neu": [],
    "text_neg": [],
    "text_sent": [],
}

# Get sentiment for the text and the title
for index, row in ticker_df.iterrows():
    try:
        # Sentiment scoring with VADER
        title_sentiment = analyzer.polarity_scores(row["Title"])
        title_sent["title_compound"].append(title_sentiment["compound"])
        title_sent["title_pos"].append(title_sentiment["pos"])
        title_sent["title_neu"].append(title_sentiment["neu"])
        title_sent["title_neg"].append(title_sentiment["neg"])
        title_sent["title_sent"].append(get_sentiment(title_sentiment["compound"]))

        text_sentiment = analyzer.polarity_scores(row["Description"])
        text_sent["text_compound"].append(text_sentiment["compound"])
        text_sent["text_pos"].append(text_sentiment["pos"])
        text_sent["text_neu"].append(text_sentiment["neu"])
        text_sent["text_neg"].append(text_sentiment["neg"])
        text_sent["text_sent"].append(get_sentiment(text_sentiment["compound"]))
    except AttributeError:
        pass

# Attaching sentiment columns to the News DataFrame
title_sentiment_df = pd.DataFrame(title_sent)
text_sentiment_df = pd.DataFrame(text_sent)
ticker_df = ticker_df.join(title_sentiment_df).join(text_sentiment_df)



In [None]:
data_df

In [None]:
data_df['text_sent'].value_counts()

In [None]:
data_df['title_sent'].value_counts()

In [None]:
data_df.describe()