In [180]:
import pymongo

In [181]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import os

In [182]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [183]:
db = client.quotes_db
collection = db.quotes

In [184]:
# URL of page to be scraped
url = 'http://quotes.toscrape.com/'

In [185]:
# Retrieve page with the requests module
response = requests.get(url)

In [186]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'lxml')

In [187]:
# Examine the results, then determine element that contains sought info
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Quotes to Scrape
  </title>
  <link href="/static/bootstrap.min.css" rel="stylesheet"/>
  <link href="/static/main.css" rel="stylesheet"/>
 </head>
 <body>
  <div class="container">
   <div class="row header-box">
    <div class="col-md-8">
     <h1>
      <a href="/" style="text-decoration: none">
       Quotes to Scrape
      </a>
     </h1>
    </div>
    <div class="col-md-4">
     <p>
      <a href="/login">
       Login
      </a>
     </p>
    </div>
   </div>
   <div class="row">
    <div class="col-md-8">
     <div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
      <span class="text" itemprop="text">
       “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
      </span>
      <span>
       by
       <small class="author" itemprop="author">
        Albert Einstein
       </small>
       <a href="/author/Albert

In [188]:
# results are returned as an iterable list
results = soup.find_all('div', class_="quote")
results

[<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
 <span class="text" itemprop="text">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>
 <span>by <small class="author" itemprop="author">Albert Einstein</small>
 <a href="/author/Albert-Einstein">(about)</a>
 </span>
 <div class="tags">
             Tags:
             <meta class="keywords" content="change,deep-thoughts,thinking,world" itemprop="keywords"/>
 <a class="tag" href="/tag/change/page/1/">change</a>
 <a class="tag" href="/tag/deep-thoughts/page/1/">deep-thoughts</a>
 <a class="tag" href="/tag/thinking/page/1/">thinking</a>
 <a class="tag" href="/tag/world/page/1/">world</a>
 </div>
 </div>,
 <div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
 <span class="text" itemprop="text">“It is our choices, Harry, that show what we truly are, far more than our abilities.”</span>
 <span>by <small class="author" itempr

In [189]:
# Loop through returned results
for result in results:
    # Error handling
    try:
        quote = result.find('span', class_="text").text
        tags = result.find('div', class_="tags").text
        author = result.find('small', class_="author").text
        details = result.find('a').attrs['href']    
        details_url = f'http://quotes.toscrape.com{details}'
     

       # Print results only if quote, tags, author and details_url are available
        if(quote and author and tags and details_url ):
            print('-------------')
            print(quote)
            print(tags)
            print(author)
            print(details_url)
          
            
    except AttributeError as e:
        print(e)

-------------
“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”

            Tags:
            
change
deep-thoughts
thinking
world

Albert Einstein
http://quotes.toscrape.com/author/Albert-Einstein
-------------
“It is our choices, Harry, that show what we truly are, far more than our abilities.”

            Tags:
            
abilities
choices

J.K. Rowling
http://quotes.toscrape.com/author/J-K-Rowling
-------------
“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”

            Tags:
            
inspirational
life
live
miracle
miracles

Albert Einstein
http://quotes.toscrape.com/author/Albert-Einstein
-------------
“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”

            Tags:
            
aliteracy
books
classic
humor

Jane Austen
http://quotes.toscrape.com/author/Jane-Austen

In [190]:
# Display items in MongoDB collection
quotes = db.quotes_db.find()

for quote in quotes:
    print(quote)

In [172]:
details = result.find('a').attrs['href']    
details_url = f'http://quotes.toscrape.com{details}'

In [173]:
response = requests.get(details_url)

In [174]:
soup = BeautifulSoup(response.text, 'lxml')

In [175]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Quotes to Scrape
  </title>
  <link href="/static/bootstrap.min.css" rel="stylesheet"/>
  <link href="/static/main.css" rel="stylesheet"/>
 </head>
 <body>
  <div class="container">
   <div class="row header-box">
    <div class="col-md-8">
     <h1>
      <a href="/" style="text-decoration: none">
       Quotes to Scrape
      </a>
     </h1>
    </div>
    <div class="col-md-4">
     <p>
      <a href="/login">
       Login
      </a>
     </p>
    </div>
   </div>
   <div class="author-details">
    <h3 class="author-title">
     Steve Martin
    </h3>
    <p>
     <strong>
      Born:
     </strong>
     <span class="author-born-date">
      August 14, 1945
     </span>
     <span class="author-born-location">
      in Waco, Texas, The United States
     </span>
    </p>
    <p>
     <strong>
      Description:
     </strong>
    </p>
    <div class="author-description">
     Stephen Glenn "Steve" Marti

In [176]:
author_info = soup.find_all('div', class_="author-details")
author_info

[<div class="author-details">
 <h3 class="author-title">Steve Martin
     </h3><p><strong>Born:</strong> <span class="author-born-date">August 14, 1945</span> <span class="author-born-location">in Waco, Texas, The United States</span></p>
 <p><strong>Description:</strong></p>
 <div class="author-description">
         Stephen Glenn "Steve" Martin is an American actor, comedian, writer, playwright, producer, musician, and composer. He was raised in Southern California in a Baptist family, where his early influences were working at Disneyland and Knott's Berry Farm and working magic and comedy acts at these and other smaller venues in the area. His ascent to fame picked up when he became a writer for the Smothers Brothers Comedy Hour, and later became a frequent guest on the Tonight Show.In the 1970s, Martin performed his offbeat, absurdist comedy routines before packed houses on national tours. In the 1980s, having branched away from stand-up comedy, he became a successful actor, playwr

In [177]:
for detail in author_info:
        # Error handling
    try:
        # Identify and return title of listing
        born = detail.find('span', class_="author-born-date").text
        description = detail.find('div', class_="author-description").text
        title = detail.find('h3', class_="author-title").text
 
        if(born and description):
            print('-------------')
            print(title)
            print(born)
            print(description)  
           
            
    except AttributeError as e:
        print(e)

    

-------------
Steve Martin
    
August 14, 1945

        Stephen Glenn "Steve" Martin is an American actor, comedian, writer, playwright, producer, musician, and composer. He was raised in Southern California in a Baptist family, where his early influences were working at Disneyland and Knott's Berry Farm and working magic and comedy acts at these and other smaller venues in the area. His ascent to fame picked up when he became a writer for the Smothers Brothers Comedy Hour, and later became a frequent guest on the Tonight Show.In the 1970s, Martin performed his offbeat, absurdist comedy routines before packed houses on national tours. In the 1980s, having branched away from stand-up comedy, he became a successful actor, playwright, and juggler, and eventually earned Emmy, Grammy, and American Comedy awards.    
    


In [179]:
# Display items in MongoDB collection
author_details = db.quotes_db.find()

for detail in author_details:
    print(detail)