In [28]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# NLTK VADER for sentiment analysis
from nltk.sentiment.vader import SentimentIntensityAnalyzer

finwiz_url = 'https://finviz.com/quote.ashx?t='

In [29]:
news_tables = {}
tickers = ['AMZN', 'TSLA', 'GOOG']

for ticker in tickers:
    url = finwiz_url + ticker
    req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) 
    response = urlopen(req)    
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id='news-table')
    # Add the table to our dictionary
    news_tables[ticker] = news_table

In [30]:
# Read one single day of headlines for 'AMZN' 
amzn = news_tables['AMZN']
# Get all the table rows tagged in HTML with <tr> into 'amzn_tr'
amzn_tr = amzn.findAll('tr')

for i, table_row in enumerate(amzn_tr):
    # Read the text of the element 'a' into 'link_text'
    a_text = table_row.a.text
    # Read the text of the element 'td' into 'data_text'
    td_text = table_row.td.text
    # Print the contents of 'link_text' and 'data_text' 
    print(a_text)
    print(td_text)
    # Exit after printing 4 rows of data
    if i == 3:
        break

Why Virgin Galactic Stock Sank Today
Jul-27-21 02:34PM  
Why Crypto Stocks Plunged Today
02:30PM  
Is Kroger Stock A Buy Right Now? Here's What Earnings, Charts Show
02:28PM  
These Growth Stocks Are Bargains Because of 1 Unreasonable Fear
02:24PM  


In [31]:
parsed_news = []

# Iterate through the news
for file_name, news_table in news_tables.items():
    # Iterate through all tr tags in 'news_table'
    for x in news_table.findAll('tr'):
        # read the text from each tr tag into text
        # get text from a only
        text = x.a.get_text() 
        # splite text in the td tag into a list 
        date_scrape = x.td.text.split()
        # if the length of 'date_scrape' is 1, load 'time' as the only element

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        # else load 'date' as the 1st element and 'time' as the second    
        else:
            date = date_scrape[0]
            time = date_scrape[1]
        # Extract the ticker from the file name, get the string up to the 1st '_'  
        ticker = file_name.split('_')[0]
        
        # Append ticker, date, time and headline as a list to the 'parsed_news' list
        parsed_news.append([ticker, date, time, text])
        
parsed_news

[['AMZN', 'Jul-27-21', '02:34PM', 'Why Virgin Galactic Stock Sank Today'], ['AMZN', 'Jul-27-21', '02:30PM', 'Why Crypto Stocks Plunged Today'], ['AMZN', 'Jul-27-21', '02:28PM', "Is Kroger Stock A Buy Right Now? Here's What Earnings, Charts Show"], ['AMZN', 'Jul-27-21', '02:24PM', 'These Growth Stocks Are Bargains Because of 1 Unreasonable Fear'], ['AMZN', 'Jul-27-21', '02:00PM', 'Amazon Starts Trading in Canada With Depositary-Receipt Launch'], ['AMZN', 'Jul-27-21', '01:58PM', "Intel CEO on the chip industry: You can't be a little player"], ['AMZN', 'Jul-27-21', '12:54PM', 'We will be a leader in chips again: Intel CEO'], ['AMZN', 'Jul-27-21', '12:19PM', 'Amazon responds to bitcoin rumour that sent crypto market surging'], ['AMZN', 'Jul-27-21', '12:07PM', 'Bitcoin hovers around $38K as Amazon denies plans to accept the currency'], ['AMZN', 'Jul-27-21', '11:46AM', 'Better Buy: Sea Limited vs. Coupang'], ['AMZN', 'Jul-27-21', '11:45AM', 'Amazon Has Big News on Gaming and Digital Currency

In [52]:
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()

# Set column names
columns = ['ticker', 'date', 'time', 'headline']

# Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
parsed_and_scored_news = pd.DataFrame(parsed_news, columns=columns)

# Iterate through the headlines and get the polarity scores using vader
scores = parsed_and_scored_news['headline'].apply(vader.polarity_scores).tolist()

# # Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)

# # Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_and_scored_news.join(scores_df, rsuffix='_right')

# # Convert the date column from string to datetime
parsed_and_scored_news['date'] = pd.to_datetime(parsed_and_scored_news.date).dt.date

parsed_and_scored_news.head()

Unnamed: 0,ticker,date,time,headline,neg,neu,pos,compound
0,AMZN,2021-07-27,02:34PM,Why Virgin Galactic Stock Sank Today,0.0,1.0,0.0,0.0
1,AMZN,2021-07-27,02:30PM,Why Crypto Stocks Plunged Today,0.0,1.0,0.0,0.0
2,AMZN,2021-07-27,02:28PM,Is Kroger Stock A Buy Right Now? Here's What E...,0.0,1.0,0.0,0.0
3,AMZN,2021-07-27,02:24PM,These Growth Stocks Are Bargains Because of 1 ...,0.25,0.547,0.203,-0.1531
4,AMZN,2021-07-27,02:00PM,Amazon Starts Trading in Canada With Depositar...,0.0,0.805,0.195,0.1779


In [53]:
parsed_and_scored_news

Unnamed: 0,ticker,date,time,headline,neg,neu,pos,compound
0,AMZN,2021-07-27,02:34PM,Why Virgin Galactic Stock Sank Today,0.000,1.000,0.000,0.0000
1,AMZN,2021-07-27,02:30PM,Why Crypto Stocks Plunged Today,0.000,1.000,0.000,0.0000
2,AMZN,2021-07-27,02:28PM,Is Kroger Stock A Buy Right Now? Here's What E...,0.000,1.000,0.000,0.0000
3,AMZN,2021-07-27,02:24PM,These Growth Stocks Are Bargains Because of 1 ...,0.250,0.547,0.203,-0.1531
4,AMZN,2021-07-27,02:00PM,Amazon Starts Trading in Canada With Depositar...,0.000,0.805,0.195,0.1779
...,...,...,...,...,...,...,...,...
295,GOOG,2021-07-22,04:51AM,Billionaire Who Missed Out on TikTok Is Trying...,0.180,0.820,0.000,-0.2960
296,GOOG,2021-07-21,07:19PM,What Duolingo Has to Offer Investors,0.000,1.000,0.000,0.0000
297,GOOG,2021-07-21,06:50PM,Google Reveals Commerce-Related Updates,0.000,1.000,0.000,0.0000
298,GOOG,2021-07-21,05:45PM,Alphabet Inc. (GOOG) Outpaces Stock Market Gai...,0.000,0.806,0.194,0.3400


In [67]:
import plotly.express as px

# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = parsed_and_scored_news.groupby(['ticker','date']).mean()

# Unstack the column ticker
mean_scores1 = mean_scores.unstack()

# Get the cross-section of compound in the 'columns' axis
mean_scores11 = mean_scores1.xs('compound', axis="columns").transpose()

# Plot a bar chart with pandas
fig = px.bar(mean_scores11,barmode='group')
fig

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [69]:
import plotly.graph_objects as go # or plotly.express as px
# fig = go.Figure() # or any Plotly Express function e.g. px.bar(...)
# fig.add_trace( ... )
# fig.update_layout( ... )

import dash
import dash_core_components as dcc
import dash_html_components as html

app = dash.Dash()
app.layout = html.Div([
    dcc.Graph(figure=fig)
])

app.run_server(debug=False, use_reloader=False) 

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off
