In [1]:
# Imports

from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# NLTK VADER for sentiment analysis
from nltk.sentiment.vader import SentimentIntensityAnalyzer

finwiz_url = 'https://finviz.com/quote.ashx?t='

In [2]:
news_tables = {}
tickers = ['amzn']

for ticker in tickers:
    url = finwiz_url + ticker
    req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) 
    response = urlopen(req)    
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id='news-table')
    # Add the table to our dictionary
    news_tables[ticker] = news_table

In [3]:
parsed_news = []

# Iterate through the news
for file_name, news_table in news_tables.items():
    # Iterate through all tr tags in 'news_table'
    for x in news_table.findAll('tr'):
        # read the text from each tr tag into text
        # get text from a only
        text = x.a.get_text() 
        # splite text in the td tag into a list 
        date_scrape = x.td.text.split()
        # if the length of 'date_scrape' is 1, load 'time' as the only element

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        # else load 'date' as the 1st element and 'time' as the second    
        else:
            date = date_scrape[0]
            time = date_scrape[1]
        # Extract the ticker from the file name, get the string up to the 1st '_'  
        ticker = file_name.split('_')[0]
        
        # Append ticker, date, time and headline as a list to the 'parsed_news' list
        parsed_news.append([ticker, date, time, text])
        


In [4]:
# parsed_news

In [5]:
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()

# Set column names
columns = ['ticker', 'date', 'time', 'headline']

# Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
parsed_and_scored_news = pd.DataFrame(parsed_news, columns=columns)

# Iterate through the headlines and get the polarity scores using vader
scores = parsed_and_scored_news['headline'].apply(vader.polarity_scores).tolist()

# # Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)

# # Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_and_scored_news.join(scores_df, rsuffix='_right')

# # Convert the date column from string to datetime
parsed_and_scored_news['date'] = pd.to_datetime(parsed_and_scored_news.date).dt.date

parsed_and_scored_news.head(50)

Unnamed: 0,ticker,date,time,headline,neg,neu,pos,compound
0,amzn,2021-07-30,05:23PM,"Tech Earnings Sparkled, but Theres Reason to W...",0.259,0.741,0.0,-0.5927
1,amzn,2021-07-30,04:47PM,Survey Reveals Which Tech Companies Consumers ...,0.0,0.708,0.292,0.5106
2,amzn,2021-07-30,04:42PM,Its going to be an exceptional back to school ...,0.0,1.0,0.0,0.0
3,amzn,2021-07-30,04:39PM,US STOCKS-Wall Street falls with Amazon; S&P 5...,0.0,0.647,0.353,0.6124
4,amzn,2021-07-30,04:39PM,Tech Stocks In Focus: Here Are The Most Truste...,0.0,0.747,0.253,0.5256
5,amzn,2021-07-30,04:32PM,"Market Recap: Friday, July 30",0.0,1.0,0.0,0.0
6,amzn,2021-07-30,04:30PM,Stock Market Rally Pauses Near Highs Amid Earn...,0.0,0.892,0.108,0.1779
7,amzn,2021-07-30,04:18PM,"Dow Jones Dips, Amazon Stock Plunges Amid Reco...",0.0,0.788,0.212,0.3612
8,amzn,2021-07-30,04:12PM,"Amazon Second-Quarter Revenue, Outlook, Below ...",0.0,0.805,0.195,0.1779
9,amzn,2021-07-30,04:03PM,"Stocks end lower after Amazon disappointment, ...",0.178,0.428,0.394,0.6757


In [6]:
import plotly.express as px
import plotly.graph_objects as go
import yfinance as yf

data=yf.Ticker(ticker).history(period= "5d", interval = "1d").reset_index()
fig = px.line(data, x='Date', y="Open")

mean_scores = parsed_and_scored_news.groupby(['ticker','date']).mean()
mean_scores1 = mean_scores.unstack()
mean_scores11 = mean_scores1.xs('compound', axis="columns").transpose()
fig2=px.bar(mean_scores11,barmode='group',title= 'Compound sentiment scores of news aticles')
# fig.add_trace(fig2)
fig.show()

In [7]:
data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2021-07-26,3673.169922,3712.080078,3647.25,3699.820068,2896300,0,0
1,2021-07-27,3698.5,3698.5,3586.149902,3626.389893,4121700,0,0
2,2021-07-28,3633.780029,3658.419922,3601.0,3630.320068,2999400,0,0
3,2021-07-29,3627.75,3637.949951,3580.01001,3599.919922,5356600,0,0
4,2021-07-30,3347.949951,3368.0,3306.97998,3327.590088,9793333,0,0


In [8]:
mean_scores11


ticker,amzn
date,Unnamed: 1_level_1
2021-07-28,0.18334
2021-07-29,0.056908
2021-07-30,0.094713


In [10]:
mean_scores11.reset_index()
fig2=px.bar(mean_scores11,y='amzn',barmode='group',title= 'Compound sentiment scores of news aticles')
fig2

In [11]:
fig

In [12]:
fig2

In [16]:
import plotly.express as px
import plotly.graph_objects as go
# df = px.data.iris()

# fig1 = px.line(data, x='Date', y='Change')
fig.update_traces(line=dict(color = 'rgba(50,50,50,0.2)'))

fig2 =px.bar(x=data['Date'], y=data['Open'])

fig3 = go.Figure(data=fig.data + fig2.data)
fig3.show()

import dash
import dash_core_components as dcc
import dash_html_components as html

app = dash.Dash()
app.layout = html.Div([
    dcc.Graph(figure=fig3)
])

app.run_server(debug=True, use_reloader=False,port=8063)  # Turn off reloader if inside Jupyter

Dash is running on http://127.0.0.1:8063/

Dash is running on http://127.0.0.1:8063/

Dash is running on http://127.0.0.1:8063/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
