In [5]:
import random
import csv
import logging

from shop_crawler import *
import common_actors

### Prepare different urls to analyze

In [6]:
# All urls
all_urls = []
with open('../resources/pvio_vio_us_ca_uk_sample1.csv', 'r') as f:
    rows = csv.reader(f)
    for row in rows:
        url = row[0]
        if url:
            all_urls.append(url)

# Random sample urls
random.seed(1)
sample_urls = random.sample(all_urls, 100)

# Some good urls to analyze by hands
good_urls = [
    'curlebotanicals.com',
    'theglamourshop.com',
    'vape-fuel.com',
    'firstfitness.com',
    'sandlakedermatology.com',
    'getwaave.com',
    'dixieems.com',
    'jonessurgical.com',
    'srandd.com',
    'ambarygardens.com',
    'anabolicwarfare.com'
]


### Mock user and payment information

In [7]:
user_info = UserInfo(
    first_name = 'John',
    last_name = 'Smith',
    country = 'United States',
    home = 34,
    street = 'Ocean drive',
    city = 'Miami',
    zip = '33125',
    state = 'FLorida',
    
    phone = '1231232',
    email = 'john@service.com'
)

billing_info = PaymentInfo(
    card_number = '1413232312312321',
    expire_date_year = 2020,
    expire_date_month = 12,
    cvc = '123'
)

### Set up logging level

In [8]:
logger = logging.getLogger('shop_crawler')
logger.setLevel(logging.INFO)

handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

### Start crawling for every url

In [None]:
crawler = ShopCrawler(user_info, billing_info, '/usr/bin/chromedriver', headless=True)
common_actors.add_crawler_extensions(crawler)

with crawler:
    for url in good_urls:
        print('\n\n{}'.format(url))
        crawler.crawl(url)




curlebotanicals.com


2018-07-10 10:23:32,350 shop_crawler INFO     processing state: new
2018-07-10 10:23:32,353 shop_crawler INFO     handler <common_actors.AddToCart object at 0x7ff59d26d860>
2018-07-10 10:24:36,936 shop_crawler INFO     new_state product_in_cart, url http://www.curlebotanicals.com/kratom/
2018-07-10 10:24:36,990 shop_crawler INFO     processing state: product_in_cart
2018-07-10 10:24:36,992 shop_crawler INFO     handler <common_actors.ToCheckout object at 0x7ff59d26d9e8>
2018-07-10 10:25:39,031 shop_crawler INFO     new_state product_in_cart, url http://www.curlebotanicals.com/kratom/
2018-07-10 10:25:39,032 shop_crawler INFO     handler <common_actors.ToCartLink object at 0x7ff59d26da20>
2018-07-10 10:25:40,711 shop_crawler INFO     new_state product_in_cart, url http://www.curlebotanicals.com/kratom/
2018-07-10 10:25:40,770 shop_crawler INFO     processing state: product_in_cart
2018-07-10 10:25:40,771 shop_crawler INFO     handler <common_actors.ToCheckout object at 0x7ff59d26d9e8>
2



theglamourshop.com


2018-07-10 10:26:48,279 shop_crawler INFO     processing state: new
2018-07-10 10:26:48,281 shop_crawler INFO     handler <common_actors.AddToCart object at 0x7ff59d26d860>
2018-07-10 10:27:51,810 shop_crawler INFO     new_state product_in_cart, url https://store-bb7f1.mybigcommerce.com/cart.php?suggest=0b86938f-6738-47ad-b4b6-8629738c9e65
2018-07-10 10:28:11,843 shop_crawler INFO     processing state: product_in_cart
2018-07-10 10:28:11,844 shop_crawler INFO     handler <common_actors.ToCheckout object at 0x7ff59d26d9e8>


 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "html5lib")

  markup_type=markup_type))
2018-07-10 10:28:21,632 shop_crawler INFO     new_state checkout_page, url https://store-bb7f1.mybigcommerce.com/checkout.php
2018-07-10 10:28:41,691 shop_crawler INFO     processing state: checkout_page
2018-07-10 10:28:41,698 shop_crawler INFO     Can't purchase from shop: http://theglamourshop.com stopped at state: checkout_page, current url: https://store



vape-fuel.com


2018-07-10 10:28:50,163 shop_crawler INFO     processing state: new
2018-07-10 10:28:50,164 shop_crawler INFO     handler <common_actors.AddToCart object at 0x7ff59d26d860>
2018-07-10 10:30:23,552 shop_crawler INFO     new_state new, url http://vape-fuel.com/
2018-07-10 10:30:23,553 shop_crawler INFO     handler <common_actors.GoogleForProductPage object at 0x7ff59d26d940>


### Test Empty cart processing

In [None]:
# Make it True if you want to test empty cart detection
test_empty_cart = False


checkout_pages = [
    'http://vape-fuel.com/checkout/cart/',
    'https://store-bb7f1.mybigcommerce.com/cart.php',
    'https://www.firstfitness.com/index.php?pg=signup',
    'https://store.sandlakedermatology.com/checkout/cart/',
    'https://www.dixieems.com/cart.asp',
    'http://www.jonessurgical.com/quick-order'    
]

if test_empty_cart:
    from selenium_helper import *
    driver = create_chrome_driver('/home/aleksei/dist/selenium/chromedriver')

    for url in checkout_pages:
        driver.get(url)
        time.sleep(30)
        
        is_empty_cart = common_actors.is_empty_cart(driver)
        print('url: {}, is empty cart: {}'.format(url, is_empty_cart))

    driver.quit()