<a href="https://colab.research.google.com/github/laroccathebrux/-DS-SentimentalAnalisys/blob/main/Sentimental.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import libraries
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from urllib.request import urlopen
from urllib.request import Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer



In [2]:
# Parameters 
n = 15 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN', 'PBR', 'VALE']

In [3]:
# Get Data
finviz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finviz_url + ticker
    req = Request(url=url,headers={'user-agent': 'my-app/0.0.1'}) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(i, ' - ', a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass



Recent News Headlines for AAPL: 
0  -  Dow Jones Futures Rally As Biden, Republicans 'Hopeful' For Stimulus Deal; GME Stock Crashes Another 30% ( Feb-02-21 06:29AM )
1  -  The Zacks Analyst Blog Highlights: Apple, Ford, Citizens Financial Group, Ally Financial and Capital One Financial ( 05:44AM )
2  -  Why This Apple Analyst Thinks A Collaboration With GM, PSA On Electric Vehicles Is Likely ( 03:12AM )
3  -  War Of Labels: How Facebook Plans To Tackle Apple's New Privacy Rules ( 01:38AM )
4  -  Apple sees blizzard of demand for $14 billion debt deal ( Feb-01-21 07:56PM )
5  -  Tech Players Move to Promote Black-Owned Businesses ( 07:02PM )
6  -  Apple Sells $14 Billion of Bonds as Share Buybacks Seen Rising ( 05:23PM )
7  -  Dow Jones Stocks To Buy And Watch In February 2021: Apple, Microsoft Jump ( 04:34PM )
8  -  Dow Jones Rallies 230 Points Amid Biden, GOP Stimulus Talks; Is Silver The New GameStop? ( 04:25PM )
9  -  Apple (AAPL), Microsoft (MSFT), Disney (DIS) Are Top Admired Co

In [4]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        text = x.a.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text])

In [5]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [6]:
# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()

columns = ['Ticker', 'Date', 'Time', 'Headline']
news = pd.DataFrame(parsed_news, columns=columns)
scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

df_scores = pd.DataFrame(scores)
news = news.join(df_scores, rsuffix='_right')

In [7]:
# View Data 
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)



              Date     Time    neg    neu  pos  compound
Ticker                                                  
AAPL    2021-02-02  06:29AM  0.000  1.000  0.0    0.0000
AAPL    2021-02-02  05:44AM  0.000  1.000  0.0    0.0000
AAPL    2021-02-02  03:12AM  0.000  1.000  0.0    0.0000
AAPL    2021-02-02  01:38AM  0.262  0.738  0.0   -0.5994
AAPL    2021-02-01  07:56PM  0.333  0.667  0.0   -0.4588


              Date     Time  neg    neu    pos  compound
Ticker                                                  
TSLA    2021-02-02  06:29AM  0.0  1.000  0.000      0.00
TSLA    2021-02-02  04:14AM  0.0  1.000  0.000      0.00
TSLA    2021-02-02  02:21AM  0.0  1.000  0.000      0.00
TSLA    2021-02-01  07:22PM  0.0  0.862  0.138      0.34
TSLA    2021-02-01  06:30PM  0.0  1.000  0.000      0.00


              Date     Time  neg    neu    pos  compound
Ticker                                                  
AMZN    2021-02-02  06:00AM  0.0  0.885  0.115    0.0772
AMZN    2021-02-02  05:00