In [33]:
# Import libraries
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from urllib.request import urlopen
from urllib.request import Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import  datetime

# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']

In [34]:
# Get Data
finviz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
            "X-Requested-With": "XMLHttpRequest"}

for ticker in tickers:
    url = finviz_url + ticker
    req = Request(url=url,headers=headers) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass



Recent News Headlines for AAPL: 
2 Under-the-Radar Gaming Stocks You Can Buy and Hold for the Next Decade ( Today 11:45AM )
iPhone 15: Release date, price specs and features revealed ( 09:49AM )
Analyst: Apple Stock Could Soar to $240 ( 09:16AM )


Recent News Headlines for TSLA: 
Tesla Vs. BYD 2023: TSLA Surges On Bullish Dojo Bet; BYD Hits Resistance ( Today 11:46AM )
Huge Positive News for Tesla Stock Investors ( 10:15AM )
10 Reddit Stocks with Biggest Upside ( 09:44AM )


Recent News Headlines for AMZN: 
Amazon, Walmart face a huge new competitor with a big advantage ( Today 10:17AM )
Up 80% This Year, Are Investors Overrating Shopify Stock? ( 08:30AM )
Better Buy: Amazon vs. Alibaba Stock ( 08:06AM )


In [35]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        text = x.a.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text])

In [41]:
# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()

columns = ['Ticker', 'Date', 'Time', 'Headline']
news = pd.DataFrame(parsed_news, columns=columns)
scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

df_scores = pd.DataFrame(scores)
news = news.join(df_scores, rsuffix='_right')

In [49]:
news

Unnamed: 0,Ticker,Date,Time,Headline,neg,neu,pos,compound
0,AAPL,2023-09-17,11:45AM,2 Under-the-Radar Gaming Stocks You Can Buy an...,0.0,1.000,0.000,0.0000
1,AAPL,2023-09-17,09:49AM,"iPhone 15: Release date, price specs and featu...",0.0,1.000,0.000,0.0000
2,AAPL,2023-09-17,09:16AM,Analyst: Apple Stock Could Soar to $240,0.0,1.000,0.000,0.0000
3,AAPL,2023-09-17,07:00AM,China's Real Aim In Move Against iPhones: Send...,0.0,1.000,0.000,0.0000
4,AAPL,2023-09-17,05:30AM,The Tech Trade Is Showing Cracks. Higher Rates...,0.2,0.800,0.000,-0.4576
...,...,...,...,...,...,...,...,...
295,AMZN,2023-09-12,08:51AM,These 5 Real Market Leaders Are Near Buy Points,0.0,1.000,0.000,0.0000
296,AMZN,2023-09-12,08:26AM,"Amazon (AMZN) Stock Quotes, Company News And C...",0.0,0.825,0.175,0.1779
297,AMZN,2023-09-12,08:06AM,S&P 500 Giants Among These 5 Real Market Leade...,0.0,1.000,0.000,0.0000
298,AMZN,2023-09-12,08:00AM,Amazon Stock Ready To Break Out As It Bolsters AI,0.0,0.656,0.344,0.4939


In [56]:
df

Unnamed: 0_level_0,Mean Sentiment
Ticker,Unnamed: 1_level_1
AMZN,0.24
TSLA,0.13
AAPL,0.04


In [63]:
# Nóticias de hoje
news['Date'] = news['Date'].replace('Today', datetime.date.today())
news['Date'] = pd.to_datetime(news['Date']).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe)
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')



              Date     Time  neg  neu  pos  compound
Ticker                                              
AAPL    2023-09-17  11:45AM  0.0  1.0  0.0    0.0000
AAPL    2023-09-17  09:49AM  0.0  1.0  0.0    0.0000
AAPL    2023-09-17  09:16AM  0.0  1.0  0.0    0.0000
AAPL    2023-09-17  07:00AM  0.0  1.0  0.0    0.0000
AAPL    2023-09-17  05:30AM  0.2  0.8  0.0   -0.4576
...            ...      ...  ...  ...  ...       ...
AAPL    2023-09-13  01:36PM  0.0  1.0  0.0    0.0000
AAPL    2023-09-13  01:21PM  0.0  1.0  0.0    0.0000
AAPL    2023-09-13  01:05PM  0.0  1.0  0.0    0.0000
AAPL    2023-09-13  01:05PM  0.0  1.0  0.0    0.0000
AAPL    2023-09-13  12:52PM  0.0  1.0  0.0    0.0000

[100 rows x 6 columns]


              Date     Time    neg    neu    pos  compound
Ticker                                                    
TSLA    2023-09-17  11:46AM  0.000  1.000  0.000    0.0000
TSLA    2023-09-17  10:15AM  0.000  0.459  0.541    0.7096
TSLA    2023-09-17  09:44AM  0.000  1.000  0.00