In [1]:
# Sentiment Analysis Working Notebook

In [2]:
# Master list of all the tickers
ticker_list = ['TSLA', 'ARKK','JNJ','NVDA','XOM']
ticker_sent = {}

In [3]:
# Initial imports
import pandas as pd
from pathlib import Path
import os
from dotenv import load_dotenv
import nltk as nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

get_ipython().run_line_magic("matplotlib", "inline")
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/bkamalnivas/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [4]:
def get_ticker_news(ticker):
    data_csv_path = Path("Resources/news/" + ticker + ".csv")
    data_df = pd.read_csv(data_csv_path)
    data_df['Date'] = pd.to_datetime(data_df['Date']).dt.date
    data_df = data_df.groupby('Date')['Title', 'Description'].agg(lambda column: ". ".join(column))
    return data_df
    

In [5]:
def get_ticker_price():
    data_csv_path = Path("Resources/prices/close.csv")
    data_df = pd.read_csv(data_csv_path)
    data_df['Date'] = pd.to_datetime(data_df['Date']).dt.date
    data_df = data_df.set_index('Date')
   
    return data_df

In [6]:
# Sentiment calculation based on compound score
def get_sentiment(score):
    """
    Calculates the sentiment based on the compound score.
    """
    result = 0  # Neutral by default
    if score >= 0.05:  # Positive
        result = 1
    elif score <= -0.05:  # Negative
        result = -1

    return result

In [7]:
def get_sentiment_score(ticker):
    data_df = get_ticker_news(ticker)
    data_df = data_df.reset_index()
    title_sent = {
    "title_compound": [],
    "title_pos": [],
    "title_neu": [],
    "title_neg": [],
    "title_sent": [],
    }
    text_sent = {
    "text_compound": [],
    "text_pos": [],
    "text_neu": [],
    "text_neg": [],
    "text_sent": [],
    }

    # Get sentiment for the text and the title
    for index, row in data_df.iterrows():
        try:
            # Sentiment scoring with VADER
            title_sentiment = analyzer.polarity_scores(row["Title"])
            title_sent["title_compound"].append(title_sentiment["compound"])
            title_sent["title_pos"].append(title_sentiment["pos"])
            title_sent["title_neu"].append(title_sentiment["neu"])
            title_sent["title_neg"].append(title_sentiment["neg"])
            title_sent["title_sent"].append(get_sentiment(title_sentiment["compound"]))

            text_sentiment = analyzer.polarity_scores(row["Description"])
            text_sent["text_compound"].append(text_sentiment["compound"])
            text_sent["text_pos"].append(text_sentiment["pos"])
            text_sent["text_neu"].append(text_sentiment["neu"])
            text_sent["text_neg"].append(text_sentiment["neg"])
            text_sent["text_sent"].append(get_sentiment(text_sentiment["compound"]))
        except AttributeError:
            pass

    # Attaching sentiment columns to the News DataFrame
    title_sentiment_df = pd.DataFrame(title_sent)
    text_sentiment_df = pd.DataFrame(text_sent)
    data_df = data_df.join(title_sentiment_df).join(text_sentiment_df)
    data_df = data_df.set_index('Date')
    return data_df
    
    

In [8]:
def get_clean_df():
    # All the closing prices are in single csv, read that
    data_df_close = get_ticker_price()
    data_df_close.head()

    # Get the sentiment of all the tickers
    for ticker in ticker_list:
       data_df = get_sentiment_score(ticker)
       data_df['Close'] = data_df_close[ticker]
       data_df = data_df[['title_compound', 'text_compound', 'Close']]
       data_df = data_df.dropna()
       ticker_sent[ticker] = data_df



In [9]:
# Call the API to get the final DF for ML to work 
get_clean_df()

  """


In [10]:
ticker_sent['NVDA']

Unnamed: 0_level_0,title_compound,text_compound,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-03,0.9393,0.9928,301.209991
2022-01-04,0.6174,0.9922,292.899994
2022-01-05,0.8970,0.9979,276.040009
2022-01-06,0.7003,0.9304,281.779999
2022-01-07,0.6692,0.9851,272.470001
...,...,...,...
2022-05-24,0.8316,0.9538,161.539993
2022-05-25,0.8979,0.9334,169.750000
2022-05-26,0.8466,0.9726,178.509995
2022-05-27,0.5519,0.9718,188.110001
