# A Program which can read all the quotes from a website (http://quotes.toscrape.com)

In [1]:
from bs4 import BeautifulSoup as bs
import requests as req
import pandas as pd
import traceback

currentPageNo = 1
proceed = True
#define a list
quoteList = []

while(proceed):    
    url = "http://quotes.toscrape.com/page/"+ str(currentPageNo)
    print(f"Currently scraping {url}")
    try:
        soup = bs(req.get(url).text, "lxml")  
    except Exception as e:
        print(f"An error occoured reading from:[{url}]\n[{e}]")
        traceback.print_exc()
        break
    else:
        if len(soup.findAll("div", attrs = {"class":"quote"})) <= 0:
            print("No quotes found for scraping, exiting...")
            proceed = False
        else:
            print("Quotes found, lets scrape...")        
            quotes = soup.findAll("span", attrs = {"class":"text"})
            authors = soup.findAll("small", attrs = {"class":"author"})
            for quote, author in zip(quotes, authors):
                #define a dictionary
                quoteObj = {}
                quoteObj['Quote'] = quote.text
                quoteObj['Author'] = author.text
                #add quote to list
                quoteList.append(quoteObj)
            #Move to next page
            currentPageNo += 1

# Create a dataframe from the list
try:
    if len(quoteList) > 0:
        df = pd.DataFrame(quoteList)
        print("Persisting scraped data to file")
        df.to_csv('scraped_quotes.csv')
        df.to_excel('scraped_quotes.xlsx')
    else:
        print('Nothing to persist')
except Exception as e:
    print(f"An error has occoured:[{e}]")
    traceback_print_exc()
else:
    print("Completed...")

Currently scraping http://quotes.toscrape.com/page/1
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/2
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/3
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/4
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/5
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/6
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/7
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/8
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/9
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/10
Quotes found, lets scrape...
Currently scraping http://quotes.toscrape.com/page/11
No quotes found for scraping, exiting...
Persisting scraped data to file
Completed...
