In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')['data-date'] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Canucks defeat Maple Leafs in OT in return from COVID-19 pause
Horvat scores second goal to win it at 1:19 in first game since March 24
2021-04-18
-----------------
Leddy, Islanders defeat Flyers in OT
Scores at 2:23; Sorokin has 30-save shutout for New York; Elliott makes 27 for Philadelphia
2021-04-18
-----------------
Stone extends multipoint streak to four, Golden Knights defeat Ducks
Pacioretty, Roy each has goal, assist for Vegas, which sweeps Anaheim with sixth straight win
2021-04-18
-----------------
Marchand scores four points, Bruins top Capitals for fourth win in row
Has two goals, two assists to reach 700 NHL points; Ovechkin remains one behind Dionne
2021-04-18
-----------------
Zibanejad, Rangers recover, defeat Devils for four-game sweep
Breaks tie late on power play after New York loses three-goal lead
2021-04-18
-----------------
Reinhart scores twice, Sabres end Penguins point streak at five
Tokarski makes 34 saves for Buffalo, which is 3-1-1 in its

In [6]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('607ced5a0168866fc92a1a86'), 'header': 'Canucks defeat Maple Leafs in OT in return from COVID-19 pause', 'subheader': 'Horvat scores second goal to win it at 1:19 in first game since March 24', 'date': '2021-04-18'}
{'_id': ObjectId('607ced5b0168866fc92a1a87'), 'header': 'Leddy, Islanders defeat Flyers in OT', 'subheader': 'Scores at 2:23; Sorokin has 30-save shutout for New York; Elliott makes 27 for Philadelphia', 'date': '2021-04-18'}
{'_id': ObjectId('607ced5b0168866fc92a1a88'), 'header': 'Stone extends multipoint streak to four, Golden Knights defeat Ducks', 'subheader': 'Pacioretty, Roy each has goal, assist for Vegas, which sweeps Anaheim with sixth straight win', 'date': '2021-04-18'}
{'_id': ObjectId('607ced5b0168866fc92a1a89'), 'header': 'Marchand scores four points, Bruins top Capitals for fourth win in row', 'subheader': 'Has two goals, two assists to reach 700 NHL points; Ovechkin remains one behind Dionne', 'date': '2021-04-18'}
{'_id': ObjectId('607ced5b