In [2]:
import pandas as pd
import numpy as np
import datetime
import requests
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from requests.exceptions import ConnectionError
from bs4 import BeautifulSoup

In [28]:
# Obtiene todos los textos dentro de spans para un web_content especificado

def web_content_div(web_content,class_path):
    web_content_div = web_content.find_all('div',{'class': class_path})
    try:
        spans = web_content_div[0].find_all('fin-streamer')
        texts = [span.get_text() for span in spans]
    except IndexError:
        texts = []
    
    return texts

In [30]:
# Extrae la información de precio, % cambio y market cap para un código de stock
def real_time_price(stock_code):
   
    url='https://finance.yahoo.com/quote/'+ stock_code + '?p=' + stock_code + '&.tsrc=fin-srch'
    
    try:
        r = requests.get(url)
        web_content = BeautifulSoup(r.text, 'lxml')
        
        texts = web_content_div(web_content, "My(6px) Pos(r) smartphone_Mt(6px) W(100%)")
       
        if texts != []:
            price, change = texts[0], texts[2]
        else:
            price, change = [1], [1]
            print('Nothing found')
        data=pd.read_html(url)     
        market_cap=df=data[1].loc[0,1]
        
    except ConnectionError:
        price, change, market_cap = [], [], []
        print('Connection Error')
    return price, change, market_cap



In [31]:
# Extrae tabla de compañías del SP500 de wikipedia
sp500url='https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
code_table=pd.read_html(sp500url)
symbols = code_table[0]
symbols.rename(columns={'Symbol': 'stock_code', 'GICS Sector':'sector', 'GICS Sub-Industry':'industry'}, inplace=True)

In [32]:
tickers=symbols['stock_code'].tolist()

In [20]:
# reemplaza tickers con '.'

for i in range(len(tickers)):
    if tickers[i]=='BRK.B':
        tickers[i]='BRK-B'
    elif tickers[i]=='BF.B':
        tickers[i]='BF-B'

In [23]:
# Aplica el webscrapping para una lista de tickers

tickers = ['AAPL', 'GOOGL','ABBV', 'AMZN', 'TSLA', 'MSFT', 'JNJ', 'HON', 'ETN', 'XOM', 'COP']
info = []

for stock_code in tickers:
    time_stamp = datetime.datetime.now()
    time_stamp = time_stamp.strftime('%d-%m-%Y %H:%M:%S')
    price, change, market_cap = real_time_price(stock_code)
    
    info.append(
        {
            'time_stamp': time_stamp,
            'stock_code': stock_code,
            'price': price,
            'change': change,
            'market_cap': market_cap
        }  
    )
prices_df=pd.DataFrame(info)

In [33]:
# Join con la tabla de symbols
df=pd.merge(prices_df, symbols[['stock_code','sector', 'industry']], on='stock_code', how='left')
df['change']=df['change'].str.replace('[()]','')




The default value of regex will change from True to False in a future version.



In [34]:
df['change']=(df['change'].str.rstrip('%').astype(float))
df['price']=df['price'].astype(float)

# convierte market_cap de str a int en billions
multipliers = {'K':1000, 'M':1000000, 'B':1000000000, 'T':1000000000000}
def string_to_billions(string):
    if string[-1].isdigit():
        return float(string)/1000000000
    mult =multipliers[string[-1]]
    return float(string[:-1])*mult/1000000000

df['market_cap'] = df['market_cap'].apply(string_to_billions)

In [36]:
fig = px.treemap(df, path=['sector', 'industry', 'stock_code'], values='market_cap',
                  color='change', hover_data=['change'], 
                  color_continuous_scale=[(0, "#f63538"), (0.5, "#414554"), (1, "#30cc5a")],
                  color_continuous_midpoint=0, range_color=[-3,3])

fig.data[0].customdata = fig.data[0].marker.colors
fig.data[0].texttemplate = "<b>%{label}</b><br>Market Cap: %{value:.0f} B<br>Change: %{customdata:.2f}%<br>"
fig.update_traces(hovertemplate='%{label}<br>Change=%{customdata:.2f}%')
fig.update_traces(marker=dict(line=dict(width=1, color='#262931')))
fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)'
})
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), autosize=True)
print('Treemap')
fig.show()

Treemap
