In [1]:
import pandas as pd 
import numpy as np

import yfinance as yf
from bs4 import BeautifulSoup
import requests

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import datetime

## Largest Companies by Market Cap Today (TOP 49 LIST)

In [2]:
source = requests.get('https://www.dogsofthedow.com/largest-companies-by-market-cap.htm').text
soup = BeautifulSoup(source, 'lxml')  

In [3]:
rows = [soup.find('tr', class_=f'row-{str(i)}') for i in range(2, 52)] 

In [4]:
symbol = list()
company = list()
cap_rank = list()
market_cap = list()

for row in rows:
    sy = row.find('td', class_='column-1').a.text
    comp = row.find('td', class_='column-2').text
    cap_ran = row.find('td', class_='column-3').text
    mar_cap = row.find('td', class_='column-4').text
    
    if sy == 'BRK.A':
        continue
        
    symbol.append(sy)
    company.append(comp)
    cap_rank.append(cap_ran)
    market_cap.append(mar_cap)

In [5]:
top_49_companies = pd.DataFrame(data=list(zip(symbol, company, cap_rank, market_cap)), 
                                index=range(1, 50), 
                                columns=['Symbol', 'Company', 'Cap_Rank', 'Market_Cap'])

In [6]:
top_49_companies.head()

Unnamed: 0,Symbol,Company,Cap_Rank,Market_Cap
1,AAPL,Apple,1,2035.4
2,MSFT,Microsoft,2,1688.2
3,AMZN,Amazon,3,1666.8
4,GOOGL,Alphabet,4,1192.5
5,FB,Facebook,5,839.3


## Get News of all Top 49 Companies

In [7]:
tickers = top_49_companies['Symbol']
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_dict = dict()

numbers = [str(i) for i in range(0, 10)]

for ticker in tickers:
    
    dates = list()
    times = list()
    newss = list()
    
    url = finwiz_url + ticker
    source = requests.get(url=url, headers={'user-agent': 'my-app/0.0.1'}).text
    soup = BeautifulSoup(source, 'lxml')
    
    html = soup.find('table', id='news-table')
    rows = html.find_all('tr')
    
    today_date = datetime.date.today()
    
    for row in rows:
        date_time = row.td.text
        news = row.find('div', class_='news-link-left').a.text
        
        if date_time[0] not in numbers:
            datee = date_time.split(' ')[0]
            
            datee = datetime.datetime.strptime(datee, '%b-%d-20').date()
            datee = datetime.date(today_date.year,datee.month,datee.day)
            
            if datee != today_date:
                break
            time = date_time.split(' ')[1]
        else:
            time = date_time
            
        dates.append(datee)
        times.append(time.strip())
        newss.append(news)
            
    news_dict[ticker] = [dates, times, newss]    

## Sentiment Analysis

In [8]:
analyser = SentimentIntensityAnalyzer()

news = []
comp = []
compound = []

for company in news_dict:
    text = ''
    for i in news_dict[company][2]:
        text = text + ' ' + i
        
    d = analyser.polarity_scores(text)
    
    if d['compound'] > 0.05:
        compound.append('pos')
    elif d['compound'] > -0.05 and d['compound'] < 0.05:
        compound.append('neu')
    elif d['compound'] <= 0.05:
        compound.append('neg')
            
        
    news.append(text)
    comp.append(company)

In [9]:
top_49_company_news = pd.DataFrame(data=list(zip(comp, news, compound)), 
                                   columns=['Symbol', 'News', 'Sentiment'], 
                                   index=range(1, 50))

In [10]:
top_49_company_news.head()

Unnamed: 0,Symbol,News,Sentiment
1,AAPL,,neu
2,MSFT,,neu
3,AMZN,Amazon to invest $2.8 billion to build its se...,pos
4,GOOGL,Deutsche Bank Alumni Are Helping Son Remake J...,pos
5,FB,Facebook removes 'inauthentic' networks spann...,pos


## Merge the data sets

In [11]:
merged_df = top_49_companies.merge(right=top_49_company_news,
                        how='inner',
                        on=None)

In [12]:
merged_df

Unnamed: 0,Symbol,Company,Cap_Rank,Market_Cap,News,Sentiment
0,AAPL,Apple,1,2035.4,,neu
1,MSFT,Microsoft,2,1688.2,,neu
2,AMZN,Amazon,3,1666.8,Amazon to invest $2.8 billion to build its se...,pos
3,GOOGL,Alphabet,4,1192.5,Deutsche Bank Alumni Are Helping Son Remake J...,pos
4,FB,Facebook,5,839.3,Facebook removes 'inauthentic' networks spann...,pos
5,TSM,Taiwan Semiconductor,7,469.0,,neu
6,V,Visa,8,420.6,,neu
7,TSLA,Tesla,9,408.2,"Tesla unveils 'Tesla Tequila' for $250, produ...",neu
8,WMT,Walmart,10,406.6,Facebook Racks Up India Wins With WhatsApp Pe...,pos
9,JNJ,Johnson & Johnson,11,367.9,,neu
