In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.craigslist_db
collection = db.items

In [4]:
# URL of page to be scraped
url = 'https://newjersey.craigslist.org/search/sss?sort=rel&query=guitar'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
# Examine the results, then determine element that contains sought info
# results are returned as an iterable list
results = soup.find_all('li', class_='result-row')

# Loop through returned results
for result in results:
    # Error handling
    try:
        # Identify and return title of listing
        title = result.find('a', class_='result-title').text
        # Identify and return price of listing
        price = result.a.span.text
        # Identify and return link to listing
        link = result.a['href']

        # Run only if title, price, and link are available
        if (title and price and link):
            # Print results
            print('-------------')
            print(title)
            print(price)
            print(link)

            # Dictionary to be inserted as a MongoDB document
            post = {
                'title': title,
                'price': price,
                'url': link
            }

            collection.insert_one(post)

    except Exception as e:
        print(e)

-------------
Guitar Effect Pedals For Sale
$1
https://newjersey.craigslist.org/msg/d/hackettstown-guitar-effect-pedals-for/6817300398.html
-------------
Guitar Charm w Necklace & Lunchbag
$5
https://newjersey.craigslist.org/jwl/d/bridgewater-guitar-charm-necklace/6807117184.html
-------------
Road Runner RRMBG ABS Molded Bass Guitar Hardshell Case
$105
https://newjersey.craigslist.org/msg/d/budd-lake-road-runner-rrmbg-abs-molded/6806381793.html
-------------
Acoustic Guitar Signed by Paul Simon
$2000
https://newjersey.craigslist.org/clt/d/oradell-acoustic-guitar-signed-by-paul/6817231527.html
-------------
Digitech GSP 7 Guitar Effects Processor
$100
https://newjersey.craigslist.org/msg/d/digitech-gsp-7-guitar-effects-processor/6807899722.html
'NoneType' object has no attribute 'text'
-------------
Guitar amp - Vox VT40+
$145
https://newjersey.craigslist.org/msg/d/guitar-amp-vox-vt40/6804510865.html
-------------
Fender Gemini II E electric acoustic guitar
$60
https://newjersey.craigs

'NoneType' object has no attribute 'text'
-------------
Roland GA-112 100watt Pro Guitar Amp
$300
https://newjersey.craigslist.org/msg/d/nutley-roland-gawatt-pro-guitar-amp/6805506236.html
-------------
***** ELVIS GUITAR CD RACK BY ATLANTIC *****
$20
https://newjersey.craigslist.org/for/d/elvis-guitar-cd-rack-by-atlantic/6803376262.html
-------------
Guitar Ovation Celebrity Acoustic/Electric CC28 - like new w/hard case
$425
https://newjersey.craigslist.org/msg/d/hewitt-guitar-ovation-celebrity/6805679624.html
-------------
Rack Processors + Guitar Effects Pedals
$50
https://newjersey.craigslist.org/msg/d/rack-processors-guitar-effects-pedals/6801775708.html
-------------
Yamaha Guitar APX 600 w/ bag, capo & picks
$180
https://newjersey.craigslist.org/msg/d/montclair-yamaha-guitar-apx-600-bag/6813930847.html
-------------
Guild Diablo Guitar Strings
$20
https://newjersey.craigslist.org/msg/d/pine-brook-guild-diablo-guitar-strings/6813627847.html
-------------
Marshall Slash SL5 Guitar

In [6]:
# Display items in MongoDB collection
listings = db.items.find()

for listing in listings:
    print(listing)

{'_id': ObjectId('5c623d2b6e10d92b0b7dd56a'), 'title': 'Guitar Effect Pedals For Sale', 'price': '$1', 'url': 'https://newjersey.craigslist.org/msg/d/hackettstown-guitar-effect-pedals-for/6817300398.html'}
{'_id': ObjectId('5c623d2b6e10d92b0b7dd56b'), 'title': 'Guitar Charm w Necklace & Lunchbag', 'price': '$5', 'url': 'https://newjersey.craigslist.org/jwl/d/bridgewater-guitar-charm-necklace/6807117184.html'}
{'_id': ObjectId('5c623d2b6e10d92b0b7dd56c'), 'title': 'Road Runner RRMBG ABS Molded Bass Guitar Hardshell Case', 'price': '$105', 'url': 'https://newjersey.craigslist.org/msg/d/budd-lake-road-runner-rrmbg-abs-molded/6806381793.html'}
{'_id': ObjectId('5c623d2b6e10d92b0b7dd56d'), 'title': 'Acoustic Guitar Signed by Paul Simon', 'price': '$2000', 'url': 'https://newjersey.craigslist.org/clt/d/oradell-acoustic-guitar-signed-by-paul/6817231527.html'}
{'_id': ObjectId('5c623d2b6e10d92b0b7dd56e'), 'title': 'Digitech GSP 7 Guitar Effects Processor', 'price': '$100', 'url': 'https://newj