In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer



In [2]:
n = 3
tickers = ['AAPL', 'TSLA', 'AMZN']

In [3]:
finviz = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finviz + ticker
    req = Request(url, headers={'user-agent': 'my-app/0.0.1'})
    resp = urlopen(req)
    html = BeautifulSoup(resp, 'lxml')
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

In [8]:
try:
    for ticker in tickers:
        table = news_tables[ticker]
        tr = table.findAll('tr')
        
        print('\n')
        print('Recent News Headlines for {}'.format(ticker))
        
        for i, table_row in enumerate(tr):
            a_text = table_row.a.get_text()
            td_text = table_row.td.get_text()
            td_text = td_text.strip()
            
            print(a_text, '(', td_text, ')')
            if i == n-1:
                break
except KeyError:
    pass



Recent News Headlines for AAPL
Dow Jones Stocks To Buy And Watch In July 2020: Intel, Nike, Visa; Pfizer Breaks Out ( Jul-23-20 10:17AM )
NVIDIA Wants to Buy Arm, SoftBank's Semiconductor Subsidiary ( 09:19AM )
Apple-Backed Study Says Apple's 30% Cut Is Totally Worth It ( 08:00AM )


Recent News Headlines for TSLA
Dow Jones Falls 100 Points On Jobless Data; Tesla, Twitter Jump On Earnings, But Microsoft Tumbles ( Jul-23-20 10:16AM )
Tesla's Profitable Quarter and Earnings Beat: What Wall Street Is Saying ( 10:10AM )
Dow Jones Today, Stocks Mixed After Jobs Data; Tesla's Unexpected Profit, Twitter's 186-Million Surprise ( 09:56AM )


Recent News Headlines for AMZN
Walmart Sells Indian Wholesale Business to E-Commerce Giant Flipkart ( Jul-23-20 09:09AM )
Apple-Backed Study Says Apple's 30% Cut Is Totally Worth It ( 08:00AM )
4 Cloud Stocks to Win Big on Microsoft's Stellar Earnings Show ( 07:36AM )


In [10]:
parsed_news = []

for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        text = x.a.get_text()
        date_scrape = x.td.text.split()
        
        if len(date_scrape) == 1:
            time = date_scrape[0]
        else:
            date = date_scrape[0]
            time = date_scrape[1]
            
        ticker = file_name.split('_')[0]
        parsed_news.append([ticker, date, time, text])

In [15]:
analyzer = SentimentIntensityAnalyzer()

columns = ['Ticker', 'Date', 'Time', 'Headline']
news = pd.DataFrame(parsed_news, columns=columns)
scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

df_scores = pd.DataFrame(scores)
news = news.join(df_scores, rsuffix='_right')

In [16]:
news

Unnamed: 0,Ticker,Date,Time,Headline,neg,neu,pos,compound
0,AAPL,Jul-23-20,10:17AM,Dow Jones Stocks To Buy And Watch In July 2020...,0.000,1.000,0.000,0.0000
1,AAPL,Jul-23-20,09:19AM,"NVIDIA Wants to Buy Arm, SoftBank's Semiconduc...",0.000,1.000,0.000,0.0000
2,AAPL,Jul-23-20,08:00AM,Apple-Backed Study Says Apple's 30% Cut Is Tot...,0.171,0.651,0.178,0.0240
3,AAPL,Jul-23-20,07:52AM,Nvidia Eyes Biggest-Ever Chip Deal in Pursuit ...,0.000,1.000,0.000,0.0000
4,AAPL,Jul-23-20,06:22AM,Combative TikTok Founder Races to Save App Bef...,0.243,0.541,0.216,-0.1027
...,...,...,...,...,...,...,...,...
295,AMZN,Jul-20-20,04:24PM,"Stock Market Today: Amazon.com Goes Gonzo, Nas...",0.000,1.000,0.000,0.0000
296,AMZN,Jul-20-20,04:22PM,"US STOCKS-Wall Street closes higher, Nasdaq se...",0.000,0.839,0.161,0.3182
297,AMZN,Jul-20-20,04:13PM,Nasdaq Momentum Is Hottest in 20 Years With Am...,0.000,0.855,0.145,0.1779
298,AMZN,Jul-20-20,04:12PM,Amazon Stock Jumps As E-Commerce Growth Due To...,0.000,0.650,0.350,0.5106


In [21]:
news

Unnamed: 0,Ticker,Date,Time,Headline,neg,neu,pos,compound
0,AAPL,2020-07-23,10:17AM,Dow Jones Stocks To Buy And Watch In July 2020...,0.000,1.000,0.000,0.0000
1,AAPL,2020-07-23,09:19AM,"NVIDIA Wants to Buy Arm, SoftBank's Semiconduc...",0.000,1.000,0.000,0.0000
2,AAPL,2020-07-23,08:00AM,Apple-Backed Study Says Apple's 30% Cut Is Tot...,0.171,0.651,0.178,0.0240
3,AAPL,2020-07-23,07:52AM,Nvidia Eyes Biggest-Ever Chip Deal in Pursuit ...,0.000,1.000,0.000,0.0000
4,AAPL,2020-07-23,06:22AM,Combative TikTok Founder Races to Save App Bef...,0.243,0.541,0.216,-0.1027
...,...,...,...,...,...,...,...,...
295,AMZN,2020-07-20,04:24PM,"Stock Market Today: Amazon.com Goes Gonzo, Nas...",0.000,1.000,0.000,0.0000
296,AMZN,2020-07-20,04:22PM,"US STOCKS-Wall Street closes higher, Nasdaq se...",0.000,0.839,0.161,0.3182
297,AMZN,2020-07-20,04:13PM,Nasdaq Momentum Is Hottest in 20 Years With Am...,0.000,0.855,0.145,0.1779
298,AMZN,2020-07-20,04:12PM,Amazon Stock Jumps As E-Commerce Growth Due To...,0.000,0.650,0.350,0.5106


In [22]:
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers:
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns='Headline')
    print('\n')
    print(dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)

df = pd.DataFrame(list(zip(tickers, values)), columns=['Ticker', 'Mean Sentiment'])
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print('\n')
print(df)



              Date     Time    neg    neu    pos  compound
Ticker                                                    
AAPL    2020-07-23  10:17AM  0.000  1.000  0.000    0.0000
AAPL    2020-07-23  09:19AM  0.000  1.000  0.000    0.0000
AAPL    2020-07-23  08:00AM  0.171  0.651  0.178    0.0240
AAPL    2020-07-23  07:52AM  0.000  1.000  0.000    0.0000
AAPL    2020-07-23  06:22AM  0.243  0.541  0.216   -0.1027


              Date     Time  neg    neu    pos  compound
Ticker                                                  
TSLA    2020-07-23  10:16AM  0.0  1.000  0.000    0.0000
TSLA    2020-07-23  10:10AM  0.0  0.775  0.225    0.4404
TSLA    2020-07-23  09:56AM  0.0  0.706  0.294    0.6124
TSLA    2020-07-23  09:52AM  0.0  0.720  0.280    0.5423
TSLA    2020-07-23  09:46AM  0.0  0.621  0.379    0.6705


              Date     Time    neg    neu    pos  compound
Ticker                                                    
AMZN    2020-07-23  09:09AM  0.000  1.000  0.000    0.0000
AMZN 