# Setup

In [5]:
# Import dependencies
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt
from splinter import Browser
from bs4 import BeautifulSoup
from flask import Flask, jsonify

In [6]:
# Configure browser for scraping
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [7]:
# Define URL variables
coffeeBeanCorralURL = 'https://www.coffeebeancorral.com/categories/Green-Coffee-Beans/All-Coffees.aspx'
freshRoastedCoffeeURL = 'https://www.freshroastedcoffee.com/collections/green-coffee'
amazonURL = 'https://www.amazon.com/Best-Sellers-Grocery-Gourmet-Food-Unroasted-Coffee-Beans/zgbs/grocery/979887011'

Scrape Coffee Bean Corral for:
- Name (listing title)
- Price (listing price)
- Star Rating (Number of stars given, out of five.)
- Source (Coffee Bean Corral)
- URL (link to the bean's individual page)

In [8]:
# Launch the browser
browser.visit(coffeeBeanCorralURL)

In [9]:
# Pull in browser content to form the soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [14]:
# Narrow the search scope to the product container
results = soup.find_all('div', class_='product-info')
print(results)

[<div class="product-info">
<div class="SingleProductDisplayName recordname">
<a href="/product/Bali-Hai-Organic-Blend-SWP-Decaf.aspx" id="ctl00_MainContentHolder_ucDevelisysFacetedSearchProductGrid_ucProductGridDisplay_rpProductGrid_ctl01_SingleProductDisplay_NameHyperLink">Bali Hai Organic Decaf Indonesia Blend SWP</a>
</div>
<div id="ctl00_MainContentHolder_ucDevelisysFacetedSearchProductGrid_ucProductGridDisplay_rpProductGrid_ctl01_SingleProductDisplay_SingleProductDisplayRating">
<img alt="5 stars" class="recordrating" id="ctl00_MainContentHolder_ucDevelisysFacetedSearchProductGrid_ucProductGridDisplay_rpProductGrid_ctl01_SingleProductDisplay_RatingStars" src="/BVModules/Themes/CoffeeBean/images/buttons/Stars5.svg" style="border-width:0px;"/>
<a class="recordreviewcount" href="/product/Bali-Hai-Organic-Blend-SWP-Decaf.aspx#Write" id="ctl00_MainContentHolder_ucDevelisysFacetedSearchProductGrid_ucProductGridDisplay_rpProductGrid_ctl01_SingleProductDisplay_ReviewCount">20 reviews</a>

In [46]:
# Loop through all of the results to find the correct data
names = []
prices = []
ratings = []
URLs = []
sources = []


for result in results:
    # Name
    try:
        result_name = result.find('a').text
        print(result_name)
        names.append(result_name)
    except:
        print("Name failed")
        names.append("Failed")
        
    # Prices
    try:
        result_price = result.find('span', class_='PriceLabel').text
        print(result_price)
        prices.append(result_price)
        
    except:
        print("Price failed")
        prices.append("Failed")
        
    # Ratings
    try:
        result_rating = result.find('img_id', class_='recordrating')
        print(result_rating)
        ratings.append(result_rating)
    
    except:
        print("Rating failed")
        ratings.append("Failed")
        
    # URL
    try:
        baseURL = 'https://www.coffeebeancorral.com'
        result_URL = result.find('a')
        completeURL = baseURL + result_URL["href"]
        print(completeURL)
        URLs.append(completeURL)
    
    except:
        print("URL failed")
        URLs.append("Failed")
        
    # Source
    try:
        sources.append("Coffee Bean Corral")
    
    except:
        print("Source failed")
        sources.append("Failed")

Bali Hai Organic Decaf Indonesia Blend SWP
$6.30
None
https://www.coffeebeancorral.com/product/Bali-Hai-Organic-Blend-SWP-Decaf.aspx
Bali Organic Blue Moon
$7.75
None
https://www.coffeebeancorral.com/product/Bali-Organic-Blue-Moon__BALIBEANS.aspx
Big Black Organic CBC Blend
$6.50
None
https://www.coffeebeancorral.com/product/Big-Black-Organic-CBC-Blend__BIGBLACKORG.aspx
Bolivian Organic Caranavi FTO
$7.75
None
https://www.coffeebeancorral.com/www/Products/Bolivian-Organic---Caranavi-FT__BOLOCOL.aspx
Brazil Cerrado Natural 17/18
$4.00
None
https://www.coffeebeancorral.com/product/Brazil-Cerrado-Natural-1718__BRACERRADO.aspx
Brazil Daterra CHC Reserve Espresso
$7.25
None
https://www.coffeebeancorral.com/product/Daterra-Espresso-Blend.aspx
Brazil Daterra Pearl Bourbon Peaberry RFA
$8.50
None
https://www.coffeebeancorral.com/product/Brazil-Bourbon-Peaberry-Daterra-Estate-Pearl-Bourbon__DATPEARLBBN.aspx
Brazil Daterra Sweet Blue RFA
$8.50
None
https://www.coffeebeancorral.com/product/Brazil

In [47]:
# Pull lists into a data frame
coffeeCorralData = {'Name': names, 
                   'Price': prices,
                   'Rating': ratings,
                   'URL': URLs,
                   'Source': sources}
coffeeCorral = pd.DataFrame(data=coffeeCorralData)
coffeeCorral.head()

Unnamed: 0,Name,Price,Rating,URL,Source
0,Bali Hai Organic Decaf Indonesia Blend SWP,$6.30,,https://www.coffeebeancorral.com/product/Bali-...,Coffee Bean Corral
1,Bali Organic Blue Moon,$7.75,,https://www.coffeebeancorral.com/product/Bali-...,Coffee Bean Corral
2,Big Black Organic CBC Blend,$6.50,,https://www.coffeebeancorral.com/product/Big-B...,Coffee Bean Corral
3,Bolivian Organic Caranavi FTO,$7.75,,https://www.coffeebeancorral.com/www/Products/...,Coffee Bean Corral
4,Brazil Cerrado Natural 17/18,$4.00,,https://www.coffeebeancorral.com/product/Brazi...,Coffee Bean Corral


In [48]:
# Remove dollar sign from Price
coffeeCorral['Price'] = coffeeCorral['Price'].str.replace('$', '')
coffeeCorral.head()

Unnamed: 0,Name,Price,Rating,URL,Source
0,Bali Hai Organic Decaf Indonesia Blend SWP,6.3,,https://www.coffeebeancorral.com/product/Bali-...,Coffee Bean Corral
1,Bali Organic Blue Moon,7.75,,https://www.coffeebeancorral.com/product/Bali-...,Coffee Bean Corral
2,Big Black Organic CBC Blend,6.5,,https://www.coffeebeancorral.com/product/Big-B...,Coffee Bean Corral
3,Bolivian Organic Caranavi FTO,7.75,,https://www.coffeebeancorral.com/www/Products/...,Coffee Bean Corral
4,Brazil Cerrado Natural 17/18,4.0,,https://www.coffeebeancorral.com/product/Brazi...,Coffee Bean Corral


Scrape Amazon for:
- Name (listing title)
- Price (listing price)
- Star Rating (Number of stars given, out of five.)
- Source (Amazon)
- URL (link to the bean's individual page)