In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

In [6]:
# Retrieve page with the requests module
response = requests.get(url)

response

<Response [200]>

In [7]:
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [13]:
# Retrieve the parent divs for all articles
results = soup.find_all("div", class_="article-item__top")
# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    date = result.find("span", class_='article-item__date')['data-date']
    # get only the date from the datetime
    clean_date = date.split('T')[0]
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Vasilevskiy sets Lightning point record in win against Penguins
Makes 29 saves for longest streak by goalie with 17; Kucherov has goal, assist for Tampa Bay
2020-02-06T22:00:40-0500
-----------------
Petry scores in overtime, Canadiens defeat Ducks
Defenseman's goal at 25 seconds produces sixth win in eight games
2020-02-06T21:46:46-0500
-----------------
Bellows scores first two NHL goals to lift Islanders past Kings
Second one breaks tie with 7:50 left in third; New York extends point streak to five
2020-02-06T21:46:12-0500
-----------------
Super 16: Lightning move past Penguins, Blues to take third in rankings
Blue Jackets climb four spots; Panthers fall five
2020-02-06T00:00:00-0500
-----------------
Fantasy goalie top 25 rankings for 2019-20
Bobrovsky among 10 best for Panthers; Merzlikins continues to trend up
2020-02-06T09:00:00-0500
-----------------
Blackwood makes 46 saves, Devils shut out Flyers
Gets first NHL assist; Wood scores twice for New Jersey
2020-

In [14]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5e3cd8a8edf633f6e323483f'), 'header': 'Vasilevskiy sets Lightning point record in win against Penguins', 'subheader': 'Makes 29 saves for longest streak by goalie with 17; Kucherov has goal, assist for Tampa Bay', 'date': '2020-02-06T22:00:40-0500'}
{'_id': ObjectId('5e3cd8a8edf633f6e3234840'), 'header': 'Petry scores in overtime, Canadiens defeat Ducks', 'subheader': "Defenseman's goal at 25 seconds produces sixth win in eight games", 'date': '2020-02-06T21:46:46-0500'}
{'_id': ObjectId('5e3cd8a8edf633f6e3234841'), 'header': 'Bellows scores first two NHL goals to lift Islanders past Kings', 'subheader': 'Second one breaks tie with 7:50 left in third; New York extends point streak to five', 'date': '2020-02-06T21:46:12-0500'}
{'_id': ObjectId('5e3cd8a8edf633f6e3234842'), 'header': 'Super 16: Lightning move past Penguins, Blues to take third in rankings', 'subheader': 'Blue Jackets climb four spots; Panthers fall five', 'date': '2020-02-06T00:00:00-0500'}
{'_id': Objec