In [7]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
import datetime as dt

In [8]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [9]:
# Define database and collection
db = client.hockey_db
collection = db.items

In [10]:
# URL of page to be scraped
url = 'https://www.nhl.com/'

In [11]:
# Retrieve page with the requests module
response = requests.get(url)

In [12]:
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [17]:
# Retrieve the parent divs for all articles
results = soup.find_all('li', class_='mixed-feed__item--article')

# Loop through results to retrieve article title, header, and timestamp of article
for result in results:
    title = result.find('h4', class_='mixed-feed__header').text

    lede = result.find('h5', class_='mixed-feed__subheader').text

    # The time and date of article publication
    date = result.find('time')['datetime']
    # Slice the datetime string for the date
    article_date = date[:10]
    # Slice the datetime string for the time
    time = date[11:16]
    
    # Determine whether article was published in AM or PM
    if (int(time[:2]) >= 13):
        meridiem = 'pm'
    else:
        meridiem = 'am'

    # Concatenate time string
    time = time + meridiem
    print('-----------------')
    print(title)
    print(lede)
    print(article_date)
    print(time)

    # Dictionary to be inserted into MongoDB
    post = {
        'title': title,
        'lede': lede,
        'date': article_date,
        'time published': time
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Matthews says being named Maple Leafs captain would be 'huge honor'
Role has been vacant since 2016; center hopes Marner contract situation gets settled soon
2019-08-23
11:20am
-----------------
San Jose Sharks fantasy preview for 2019-20
Karlsson, Burns most valuable defensemen; Meier could expand on breakout season with PP1 role after Pavelski's departure
2019-08-24
00:00am
-----------------
Guerin has 'full confidence' in Boudreau as Wild coach
GM also discusses new job, raising expectations, career influences with NHL.com
2019-08-23
15:40pm
-----------------
Subban engaged to skier Vonn
Devils defenseman known for flashiness pops question in low-key proposal
2019-08-23
17:31pm
-----------------
Fantasy goalie, team win projections for 2019-20
Vasilevskiy, Bobrovsky atop list; Binnington can carry over rookie success; standings outlook for each division
2019-08-23
12:00am
-----------------
Three questions facing San Jose Sharks
Thornton's status, play of Jones amon

In [None]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)