In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')['data-date'] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Canucks' return delayed, will not face Oilers on Friday
Vancouver has not played since March 24 due to COVID-19 outbreak
2021-04-15
-----------------
Matthews out for Maple Leafs against Jets with undisclosed injury
Center day to day, leads NHL in goals
2021-04-15
-----------------
Super 16: Avalanche remain No. 1, Blues return to NHL.com power rankings
Golden Knights, Capitals move up; Lightning, Panthers tumble
2021-04-15
-----------------
Marleau approaching NHL games played record with Sharks with grace
Center's perseverance doing 'something you love' has him three from passing Howe at 1,767
2021-04-15
-----------------
Grubauer out at least two weeks for Avalanche in NHL COVID-19 protocol
Dubnyk, acquired from Sharks, debuts, defeats Blues
2021-04-14
-----------------
NHL Buzz: Rask starts in return for Bruins against Islanders
Stamkos to miss next two games for Lightning; Seguin skates with Stars, could be back in two weeks
2021-04-15
-----------------
Eichel ou

AttributeError: 'NoneType' object has no attribute 'text'

In [6]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('6078dc7b3ce33741b3591c74'), 'header': "Canucks' return delayed, will not face Oilers on Friday", 'subheader': 'Vancouver has not played since March 24 due to COVID-19 outbreak', 'date': '2021-04-15'}
{'_id': ObjectId('6078dc7b3ce33741b3591c75'), 'header': 'Matthews out for Maple Leafs against Jets with undisclosed injury', 'subheader': 'Center day to day, leads NHL in goals', 'date': '2021-04-15'}
{'_id': ObjectId('6078dc7b3ce33741b3591c76'), 'header': 'Super 16: Avalanche remain No. 1, Blues return to NHL.com power rankings', 'subheader': 'Golden Knights, Capitals move up; Lightning, Panthers tumble', 'date': '2021-04-15'}
{'_id': ObjectId('6078dc7b3ce33741b3591c77'), 'header': 'Marleau approaching NHL games played record with Sharks with grace', 'subheader': "Center's perseverance doing 'something you love' has him three from passing Howe at 1,767", 'date': '2021-04-15'}
{'_id': ObjectId('6078dc7b3ce33741b3591c78'), 'header': 'Grubauer out at least two weeks for Ava