In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.craigslist_db
collection = db.items

In [4]:
# URL of page to be scraped
url = 'https://newjersey.craigslist.org/search/sss?sort=rel&query=guitar'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
# Examine the results, then determine element that contains sought info
# results are returned as an iterable list
results = soup.find_all('li', class_='result-row')

# Loop through returned results
for result in results:
    # Error handling
    try:
        # Identify and return title of listing
        title = result.find('a', class_='result-title').text
        # Identify and return price of listing
        price = result.a.span.text
        # Identify and return link to listing
        link = result.a['href']

        # Run only if title, price, and link are available
        if (title and price and link):
            # Print results
            print('-------------')
            print(title)
            print(price)
            print(link)

            # Dictionary to be inserted as a MongoDB document
            post = {
                'title': title,
                'price': price,
                'url': link
            }

            collection.insert_one(post)

    except Exception as e:
        print(e)

-------------
Regal Acoustic Guitar
$100
https://newjersey.craigslist.org/msg/d/clifton-regal-acoustic-guitar/6858363732.html
-------------
Seagull S6 Made by Godin - Acoustic Electric Guitar
$350
https://newjersey.craigslist.org/msg/d/pearl-river-seagull-s6-made-by-godin/6858327228.html
-------------
Amada 1/4 size guitar acoustic
$50
https://newjersey.craigslist.org/msg/d/landing-amada-1-4-size-guitar-acoustic/6843254510.html
-------------
The largest Guitar Show in the Northeast
$10
https://newjersey.craigslist.org/msg/d/wallingford-the-largest-guitar-show-in/6851892090.html
-------------
Tune Technology TWB4 4 String Bass
$650
https://newjersey.craigslist.org/msg/d/orange-tune-technology-twb4-4-string/6846175581.html
-------------
Tascam GB-10 Guitar Trainer
$80
https://newjersey.craigslist.org/msg/d/hoboken-tascam-gb-10-guitar-trainer/6858284006.html
-------------
Gretsch Electromatic guitar
$65
https://newjersey.craigslist.org/msg/d/clifton-gretsch-electromatic-guitar/6858248271.

In [6]:
# Display items in MongoDB collection
listings = db.items.find()

for listing in listings:
    print(listing)

{'_id': ObjectId('5ca6a000a39bae7c3f8b2710'), 'title': 'Regal Acoustic Guitar', 'price': '$100', 'url': 'https://newjersey.craigslist.org/msg/d/clifton-regal-acoustic-guitar/6858363732.html'}
{'_id': ObjectId('5ca6a000a39bae7c3f8b2711'), 'title': 'Seagull S6 Made by Godin - Acoustic Electric Guitar', 'price': '$350', 'url': 'https://newjersey.craigslist.org/msg/d/pearl-river-seagull-s6-made-by-godin/6858327228.html'}
{'_id': ObjectId('5ca6a000a39bae7c3f8b2712'), 'title': 'Amada 1/4 size guitar acoustic', 'price': '$50', 'url': 'https://newjersey.craigslist.org/msg/d/landing-amada-1-4-size-guitar-acoustic/6843254510.html'}
{'_id': ObjectId('5ca6a000a39bae7c3f8b2713'), 'title': 'The largest Guitar Show in the Northeast', 'price': '$10', 'url': 'https://newjersey.craigslist.org/msg/d/wallingford-the-largest-guitar-show-in/6851892090.html'}
{'_id': ObjectId('5ca6a000a39bae7c3f8b2714'), 'title': 'Tune Technology TWB4 4 String Bass', 'price': '$650', 'url': 'https://newjersey.craigslist.org/