In [79]:
from bs4 import BeautifulSoup as bs
import random
from urllib.parse import urlparse
from urllib.request import Request, urlopen
import re
import requests
import csv
import pandas as pd

## Website with directions
https://blog.jovian.ai/web-scraping-yahoo-finance-using-python-7c4612fab70c

In [60]:
my_url = 'https://ca.finance.yahoo.com/quote/HBM.TO?p=HBM.TO&.tsrc=fin-srch'
response = requests.get(my_url)

In [61]:
print("response.ok : {} , response.status_code : {}".format(response.ok , response.status_code))

response.ok : True , response.status_code : 200


In [62]:
print("Preview of response.text : ", response.text[:500])

Preview of response.text :  <!doctype html><html data-color-theme="light" id="atomic" class="NoJs chrome desktop failsafe" lang="en-CA"><head prefix="og: https://ogp.me/ns#"><script>window.performance && window.performance.mark && window.performance.mark('PageStart');</script><meta charset="utf-8"><title>Hudbay Minerals Inc. (HBM.TO) Stock Price, News, Quote &amp; History - Yahoo Finance</title><meta name="keywords" content="HBM.TO, Hudbay Minerals Inc., HBM.TO stock chart, Hudbay Minerals Inc. stock chart, stock chart, st


In [64]:
def get_page(url):
    """Download a webpage and return a beautiful soup doc"""
    response = requests.get(url)
    if not response.ok:
        print('Status code:', response.status_code)
        raise Exception('Failed to load page {}'.format(url))
    page_content = response.text
    doc = bs(page_content, 'html.parser')
    return doc

In [65]:
doc = get_page(my_url)
print('Type of doc: ',type(doc))

Type of doc:  <class 'bs4.BeautifulSoup'>


In [66]:
doc.find('title')

<title>Hudbay Minerals Inc. (HBM.TO) Stock Price, News, Quote &amp; History - Yahoo Finance</title>

In [67]:
div_tags = doc.find_all('div', {'class': "Ov(h) Pend(44px) Pstart(25px)"})

In [68]:
len(div_tags)

9

In [69]:
print(div_tags[1])

<div class="Ov(h) Pend(44px) Pstart(25px)"><div class="C(#959595) Fz(11px) D(ib) Mb(6px)">Zacks</div><h3 class="Mb(5px)"><a class="js-content-viewer wafer-caas Fw(b) Fz(18px) Lh(23px) LineClamp(2,46px) Fz(17px)--sm1024 Lh(19px)--sm1024 LineClamp(2,38px)--sm1024 mega-item-header-link Td(n) C(#0078ff):h C(#000) LineClamp(2,46px) LineClamp(2,38px)--sm1024 not-isInStreamVideoEnabled" data-uuid="bc5ae09d-2fbf-3c28-9520-7723ea6049a7" data-wf-caas-prefetch="1" data-wf-caas-uuid="bc5ae09d-2fbf-3c28-9520-7723ea6049a7" href="/news/hudbay-minerals-hbm-reports-q3-004512113.html"><u class="StretchedBox"></u>HudBay Minerals (HBM) Reports Q3 Loss, Tops Revenue Estimates</a></h3><p class="Fz(14px) Lh(19px) Fz(13px)--sm1024 Lh(17px)--sm1024 LineClamp(2,38px) LineClamp(2,34px)--sm1024 M(0)">HudBay Minerals (HBM) delivered earnings and revenue surprises of 16.67% and 19.33%, respectively, for the quarter ended September 2022. Do the numbers hold clues to what lies ahead for the stock?</p></div>


In [72]:
print("Source: ", div_tags[1].find('div').text)
print("Headline : {}".format(div_tags[1].find('a').text))

Source:  Zacks
Headline : HudBay Minerals (HBM) Reports Q3 Loss, Tops Revenue Estimates


In [71]:
print("Image URL: ",div_tags[1].findParent().find('img')['src'])

Image URL:  https://s.yimg.com/uu/api/res/1.2/E0X8OtP1N.eOKR9Mkc84cw--~B/Zmk9c3RyaW07aD0xMjM7cT04MDt3PTIyMDthcHBpZD15dGFjaHlvbg--/https://media.zenfs.com/en/zacks.com/bdc2850a48a0f40806db817a9e24f36f


In [73]:
def get_news_tags(doc):
    """Get the list of tags containing news information"""
    news_class = "Ov(h) Pend(44px) Pstart(25px)" ## class name of div tag 
    news_list  = doc.find_all('div', {'class': news_class})
    return news_list

In [76]:
BASE_URL = 'https://ca.finance.yahoo.com' #Global Variable 

def parse_news(news_tag):
    """Get the news data point and return dictionary"""
    news_source = news_tag.find('div').text #source
    news_headline = news_tag.find('a').text #heading
    news_url = news_tag.find('a')['href'] #link
    news_content = news_tag.find('p').text #content
    news_image = news_tag.findParent().find('img')['src'] #thumb image
    return { 'source' : news_source,
            'headline' : news_headline,
            'url' : BASE_URL + news_url,
            'content' : news_content,
            'image' : news_image
           }

In [77]:
def scrape_yahoo_news(url, path=None):
    """Get the yahoo finance market news and write them to CSV file """
    if path is None:
        path = 'stock-market-news.csv'
        
    print('Requesting html page')
    doc = get_page(url)

    print('Extracting news tags')
    news_list = get_news_tags(doc)

    print('Parsing news tags')
    news_data = [parse_news(news_tag) for news_tag in news_list]

    print('Save the data to a CSV')
    news_df = pd.DataFrame(news_data)
    news_df.to_csv(path, index=None)
    
    #This return statement is optional, we are doing this just analyze the final output 
    return news_df 

In [80]:
YAHOO_NEWS_URL = BASE_URL+'/quote/HBM.TO?p=HBM.TO&.tsrc=fin-srch'
news_df = scrape_yahoo_news(YAHOO_NEWS_URL)

Requesting html page
Extracting news tags
Parsing news tags
Save the data to a CSV


In [82]:
news_df

Unnamed: 0,source,headline,url,content,image
0,Simply Wall St.,Hudbay Minerals Inc.'s (TSE:HBM) Financials Ar...,https://ca.finance.yahoo.com/news/hudbay-miner...,Most readers would already be aware that Hudba...,https://s.yimg.com/uu/api/res/1.2/6i6i2YmC9jlI...
1,Zacks,"HudBay Minerals (HBM) Reports Q3 Loss, Tops Re...",https://ca.finance.yahoo.com/news/hudbay-miner...,HudBay Minerals (HBM) delivered earnings and r...,https://s.yimg.com/uu/api/res/1.2/E0X8OtP1N.eO...
2,GlobeNewswire,Hudbay Provides Exploration Update and Announc...,https://ca.finance.yahoo.com/news/hudbay-provi...,Figure 1: General Location of the Llaguen Proj...,https://s.yimg.com/uu/api/res/1.2/x.zCs4auA5v4...
3,GlobeNewswire,Hudbay Announces Third Quarter 2022 Results,https://ca.finance.yahoo.com/news/hudbay-annou...,"TORONTO, Nov. 02, 2022 (GLOBE NEWSWIRE) -- Hud...",https://s.yimg.com/uu/api/res/1.2/lKO0k3O7IQeV...
4,Zacks,MP Materials Corp. (MP) Earnings Expected to G...,https://ca.finance.yahoo.com/news/mp-materials...,MP Materials Corp. (MP) doesn't possess the ri...,https://s.yimg.com/uu/api/res/1.2/kcRyCd4gFTHy...
5,Zacks,Earnings Preview: HudBay Minerals (HBM) Q3 Ear...,https://ca.finance.yahoo.com/news/earnings-pre...,HudBay Minerals (HBM) doesn't possess the righ...,https://s.yimg.com/uu/api/res/1.2/A6eDU7Slc_f2...
6,GlobeNewswire,Hudbay Announces Senior Management Appointments,https://ca.finance.yahoo.com/news/hudbay-annou...,"TORONTO, Oct. 13, 2022 (GLOBE NEWSWIRE) -- Hud...",https://s.yimg.com/uu/api/res/1.2/lKO0k3O7IQeV...
7,GlobeNewswire,Hudbay to Host Conference Call for Third Quart...,https://ca.finance.yahoo.com/news/hudbay-host-...,"TORONTO, Oct. 06, 2022 (GLOBE NEWSWIRE) -- Hud...",https://s.yimg.com/uu/api/res/1.2/lKO0k3O7IQeV...
8,Simply Wall St.,Institutional owners may take dramatic actions...,https://ca.finance.yahoo.com/news/institutiona...,If you want to know who really controls Hudbay...,https://s.yimg.com/uu/api/res/1.2/6i6i2YmC9jlI...
