# NLP Stock Sentiment Analysis

In [3]:
# Importing libraries
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import CountVectorizer
from scipy.sparse import csr_matrix
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/kaustubhsingh/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [4]:
# Data loading into dataframe
df = pd.read_csv('DailyComments.csv')
# Displaying dataframe
df

Unnamed: 0,Day of Week,comments
0,Monday,"Hello, how are you?"
1,Tuesday,Today is a good day!
2,Wednesday,It's my birthday so it's a really special day!
3,Thursday,Today is neither a good day or a bad day!
4,Friday,I'm having a bad day.
5,Saturday,There' s nothing special happening today.
6,Sunday,Today is a SUPER good day!


In [5]:
# Lower case
df['cleaned_comments'] = df['comments'].str.lower()
# Displaying dataframe
df

Unnamed: 0,Day of Week,comments,cleaned_comments
0,Monday,"Hello, how are you?","hello, how are you?"
1,Tuesday,Today is a good day!,today is a good day!
2,Wednesday,It's my birthday so it's a really special day!,it's my birthday so it's a really special day!
3,Thursday,Today is neither a good day or a bad day!,today is neither a good day or a bad day!
4,Friday,I'm having a bad day.,i'm having a bad day.
5,Saturday,There' s nothing special happening today.,there' s nothing special happening today.
6,Sunday,Today is a SUPER good day!,today is a super good day!


In [6]:
# Removing punctuation
df['cleaned_comments'] = df['cleaned_comments'].str.replace('[^\w\s]','')
# Displaying dataframe
df

  df['cleaned_comments'] = df['cleaned_comments'].str.replace('[^\w\s]','')


Unnamed: 0,Day of Week,comments,cleaned_comments
0,Monday,"Hello, how are you?",hello how are you
1,Tuesday,Today is a good day!,today is a good day
2,Wednesday,It's my birthday so it's a really special day!,its my birthday so its a really special day
3,Thursday,Today is neither a good day or a bad day!,today is neither a good day or a bad day
4,Friday,I'm having a bad day.,im having a bad day
5,Saturday,There' s nothing special happening today.,there s nothing special happening today
6,Sunday,Today is a SUPER good day!,today is a super good day


VADER ( Valence Aware Dictionary for Sentiment Reasoning) is a model used for text sentiment analysis that is sensitive to both polarity (positive/negative) and intensity (strength) of emotion. It is available in the NLTK package and can be applied directly to unlabeled text data.

# Sentiment Analysis using Vader 

In [7]:
# Creating an object or instance
sent = SentimentIntensityAnalyzer()

In [8]:
# Running polarity score on our first record of cleaned comments
first_rec = sent.polarity_scores(df.iloc[0][2])

In [9]:
# Checking the type
type(first_rec)

dict

In [10]:
# Compound score
first_rec['compound']

0.0

In [11]:
# Positive score
first_rec['pos']

0.0

In [12]:
# Negative score
first_rec['neg']

0.0

In [13]:
# Neutral score
first_rec['neu']

1.0

Our first record of our cleaned comment is Neutral.

In [14]:
# Now checking polarity score on all the cleaned comments

# Creating four empty list for compound, positive, negative and neutral scores
score_com = []
score_pos = []
score_neg = []
score_neu = []

# For loop for all the records that is the reason df.shape[0]
for i in range(0, df.shape[0]):
    # [2] is for the 3rd column (cleaned_comments)
    score = sent.polarity_scores(df.iloc[i][2])
    # Assigning compound score to score1
    score1 = score['compound']
    # Adding to empty list that we created
    score_com.append(score1)
    # Below 2 lines to extract the positive sentiment
    score2 = score['pos']
    score_pos.append(score2)
    # Below 2 lines to extract the negative sentiment
    score3 = score['neg']
    score_neg.append(score3)
    # Below 2 lines to extract the neutral sentiment
    score4 = score['neu']
    score_neu.append(score4)

In [15]:
# Adding the new columns to the data frame
# Comp: Compound, Pos: Positive, Neg: Negative, Neu: Neutral
df['Comp_Score'] = score_com
df['Pos_Score'] = score_pos
df['Neg_Score'] = score_neg
df['Neu_Score'] = score_neu

In [16]:
# Displaying data frame with score
df

Unnamed: 0,Day of Week,comments,cleaned_comments,Comp_Score,Pos_Score,Neg_Score,Neu_Score
0,Monday,"Hello, how are you?",hello how are you,0.0,0.0,0.0,1.0
1,Tuesday,Today is a good day!,today is a good day,0.4404,0.492,0.0,0.508
2,Wednesday,It's my birthday so it's a really special day!,its my birthday so its a really special day,0.5034,0.318,0.0,0.682
3,Thursday,Today is neither a good day or a bad day!,today is neither a good day or a bad day,-0.7101,0.0,0.496,0.504
4,Friday,I'm having a bad day.,im having a bad day,-0.5423,0.0,0.538,0.462
5,Saturday,There' s nothing special happening today.,there s nothing special happening today,-0.3089,0.0,0.361,0.639
6,Sunday,Today is a SUPER good day!,today is a super good day,0.7783,0.694,0.0,0.306


In [17]:
# Median of overall Score
df['Comp_Score'].median()

0.0

In [18]:
# Mean of overall Score
df['Comp_Score'].mean()

0.02297142857142858

Since median is zero and mean is close to zero, we can say that overall comments is neutral.

# Stock Sentiment

Finviz.com has been used for sentiment analysis of stock from financial news. Finviz has headlines of every relevant ticker that we can search at finviz.com. 

In [19]:
# importing libraries
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup

In [52]:
# finviz url
url = 'https://finviz.com/quote.ashx?t=NKLA'
# Requesting url
req = Request(url=url, headers={'user-agent':'my-app'})
response = urlopen(req)
# Using BeautifulSoup parsing html code
html = BeautifulSoup(response, 'html')
# Parsing the table by ID and adding to the dictionary
news_table = html.find(id='news-table')
# Finding all the table rows
ocgn_rows = news_table.findAll('tr')

# Empty list
parsed_data = []

# Getting text from each rows
for idx, row in enumerate(ocgn_rows):
    title = row.a.text
    # Adding title into parsed_data list
    parsed_data.append(title)
    

In [53]:
# Creating data frame using parsed_data
df1 = pd.DataFrame(parsed_data, columns=['Title'])
# Displaying data frame
df1

Unnamed: 0,Title
0,Is Nikola A Buy Or A Sell Now? Here's What Fun...
1,Heniff Transportation Signs LOI with the Inten...
2,Implied Volatility Surging for Nikola (NKLA) S...
3,Nikola (NKLA) to Pay $125M SEC Penalty Fee Ove...
4,Did an SEC Investigation Put This Hot EV Stock...
...,...
95,Nikola stock jumps after narrower-than-expecte...
96,UPDATE 2-Nikola to pay $125 mln penalty in SEC...
97,Nikola to pay $125 million penalty in SEC sett...
98,Nikola reserves $125 mln to fund potential set...


In [54]:
# Creating an object or instance
sia = SentimentIntensityAnalyzer()

In [55]:
# Checking polarity score on all text

# Creating four empty list for compound, positive, negative and neutral scores
score_com = []
score_pos = []
score_neg = []
score_neu = []

# For loop for all the records that is the reason df.shape[0]
for i in range(0, df1.shape[0]):
    # [0] is for the first column (title)
    score = sia.polarity_scores(df1.iloc[i][0])
    # Assigning compound score to score1
    score1 = score['compound']
    # Adding to empty list that we created
    score_com.append(score1)
    # Below 2 lines to extract the positive sentiment
    score2 = score['pos']
    score_pos.append(score2)
    # Below 2 lines to extract the negative sentiment
    score3 = score['neg']
    score_neg.append(score3)
    # Below 2 lines to extract the neutral sentiment
    score4 = score['neu']
    score_neu.append(score4)

In [56]:
# Adding the new columns to the data frame
# Comp: Compound, Pos: Positive, Neg: Negative, Neu: Neutral
df1['Comp_Score'] = score_com
df1['Pos_Score'] = score_pos
df1['Neg_Score'] = score_neg
df1['Neu_Score'] = score_neu

In [57]:
# Displaying data frame with score
df1

Unnamed: 0,Title,Comp_Score,Pos_Score,Neg_Score,Neu_Score
0,Is Nikola A Buy Or A Sell Now? Here's What Fun...,0.0000,0.0,0.000,1.000
1,Heniff Transportation Signs LOI with the Inten...,0.0000,0.0,0.000,1.000
2,Implied Volatility Surging for Nikola (NKLA) S...,0.0000,0.0,0.000,1.000
3,Nikola (NKLA) to Pay $125M SEC Penalty Fee Ove...,-0.5267,0.0,0.328,0.672
4,Did an SEC Investigation Put This Hot EV Stock...,0.0000,0.0,0.000,1.000
...,...,...,...,...,...
95,Nikola stock jumps after narrower-than-expecte...,-0.6486,0.0,0.325,0.675
96,UPDATE 2-Nikola to pay $125 mln penalty in SEC...,-0.5267,0.0,0.355,0.645
97,Nikola to pay $125 million penalty in SEC sett...,-0.5267,0.0,0.386,0.614
98,Nikola reserves $125 mln to fund potential set...,0.0000,0.0,0.000,1.000


In [58]:
# Median of overall Score
df1['Comp_Score'].median()

0.0

In [59]:
# Mean of overall Score
df1['Comp_Score'].mean()

-0.026978999999999996

The news for NKLA ticker is somewhat negative but close to neutral. We can expect some bad news based on our sentiment analysis. 