In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [3]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
Client = pymongo.MongoClient(conn)

In [4]:
# Define database and collection
db = Client.NHL_DB
articles = db.articles.find()

In [5]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

In [6]:
# Retrieve page with the requests module
response = requests.get(url)

In [7]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, "html.parser")

In [8]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en_US">
 <head>
  <title>
   NHL Hockey News | NHL.com
  </title>
  <!-- meta meta tag -->
  <meta content="IE=Edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <meta content="no-cache" http-equiv="Cache-Control"/>
  <meta content="no-cache" http-equiv="Pragma"/>
  <meta content="-1" http-equiv="Expires"/>
  <meta content="en" http-equiv="content-language"/>
  <meta content="nhl, nhl.com, www.nhl.com, playoffs, scores, video, photos, standings, news, features, players, shop, auctions, tickets, mobile, game center live, stanley cup, winter classic, draft, free agency" name="keywords"/>
  <meta content="US" name="countryCode"/>
  <meta content="NHL Hockey News" property="og:title"/>
  <meta content="NHL Hockey News NHL.com" itemprop="name"/>
  <meta content="NHL.com" property="og:site_name"/>
  <meta content="website" property="og:type"/>
  <meta content="https://cms.nhl.bamgrid.com/images/photos/3260

In [26]:
# Retrieve the parent divs for all articles
results = soup.find_all("div", class_="article-item__top")

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_="article-item__date")['data-date']
    
    # get only the date from the datetime
    date = datetime.split("T")[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    # Insert dictionary into MongoDB as a document
    db.articles.insert_one({
        "Header": header,
        "Subheader": subheader,
        "Date": date
    })

-----------------
Maple Leafs add Ritchie, could fill vacancy on top line
Sign Mrazek to team with Campbell in goal, seek secondary scoring from Kampf, Bunting
2021-08-27
-----------------
Lundqvist 'was very close' to return with Capitals last season: report
Retired goalie's bid to play following open-heart surgery ended by inflammation
2021-08-27
-----------------
Tavares expects to be ready for Maple Leafs training camp
Center 'doing fantastic' after sustaining concussion, knee injury in playoff opener
2021-08-27
-----------------
Golden Knights add Dadonov, Patrick, Howden to bolster forward depth
Also sign goalie Brossoit to back up Lehner following Fleury trade
2021-08-27
-----------------
Svechnikov agrees to eight-year, $62 million contract with Hurricanes
Restricted free agent forward had 42 points last season, leads 2018 draft class in scoring
2021-08-26
-----------------
NHL Free Agent Tracker
Complete list of signings by team, available players
2021-08-27
-----------------


In [27]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('612a5b6cf06ff4d7b411fa10'), 'Header': 'Maple Leafs add Ritchie, could fill vacancy on top line', 'Subheader': 'Sign Mrazek to team with Campbell in goal, seek secondary scoring from Kampf, Bunting', 'Date': '2021-08-27'}
{'_id': ObjectId('612a5b6cf06ff4d7b411fa11'), 'Header': "Lundqvist 'was very close' to return with Capitals last season: report", 'Subheader': "Retired goalie's bid to play following open-heart surgery ended by inflammation", 'Date': '2021-08-27'}
{'_id': ObjectId('612a5b6cf06ff4d7b411fa12'), 'Header': 'Tavares expects to be ready for Maple Leafs training camp', 'Subheader': "Center 'doing fantastic' after sustaining concussion, knee injury in playoff opener", 'Date': '2021-08-27'}
{'_id': ObjectId('612a5b6cf06ff4d7b411fa13'), 'Header': 'Golden Knights add Dadonov, Patrick, Howden to bolster forward depth', 'Subheader': 'Also sign goalie Brossoit to back up Lehner following Fleury trade', 'Date': '2021-08-27'}
{'_id': ObjectId('612a5b6cf06ff4d7b411fa1