In [19]:
import datetime as dt
from collections import OrderedDict

import grequests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np


In [46]:
def price_to_float(price_string):
    return float(price_string.replace('$', ''))
              
def parse_product_info(response):
    """
    Given a response from the Countdown API, parse it, and return in as a dictionary.
    """
    r = response
    if r.status_code != 200:
        return {}
    
    # Parse response
    d = OrderedDict()
    
    soup = BeautifulSoup(r.text, 'lxml')
    
    s = soup.select('div.product-title > h1')
    d['name'] = list(s[0].stripped_strings)[0]
    
    s = soup.select('p.product-description-text')
    ss = list(s[0].stripped_strings)
    if ss:
        d['description'] = ss[0]
    else:
        d['description'] = None
    

    s = soup.select('span.volume-size')
    ss = list(s[0].stripped_strings)
    if ss:
        d['size'] = ss[0]
    else:
        d['size'] = None

    s = soup.select('span.special-price')
    if s:
        d['on_sale'] = True
        ss = list(s[0].stripped_strings)
        d['sale_price'] = price_to_float(ss[0])
        t = soup.select('span.was-price')
        tt = list(t[0].stripped_strings)
        d['price'] = price_to_float(tt[0].replace('was', ''))
    else:
        d['on_sale'] = False
        d['sale_price'] = None    
        t = soup.select('span.price')
        tt = list(t[0].stripped_strings)
        d['price'] = price_to_float(tt[0])
    
    s = soup.select('div.cup-price')
    ss = list(s[0].stripped_strings)
    if ss:
        d['cup_price'] = ss[0]
    else:
        d['cup_price'] = None
        
    d['datetime'] = dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

    return d

def collect_product_info(stock_codes, as_df=True):
    """
    For each product stock code (string) in the list ``stock_codes``,
    issue a GET request to Countdown at https://shop.countdown.co.nz/Shop/ProductDetails?
    to retrieve product information.
    Send all the requests at once and return the result as a generator of responses.
    """
    url = 'https://shop.countdown.co.nz/Shop/ProductDetails?'
    rs = [grequests.get(url, params={'stockcode': stock_code})
      for stock_code in stock_codes]    
    result = [parse_product_info(r) for r in grequests.imap(rs)]
    
    if as_df:
        result = pd.DataFrame(result)
        result['datetime'] = pd.to_datetime(result['datetime'])
        
    return result



In [47]:
# Test some
codes = [
    '281739',  # cheese
    '260803',  # chocolate
    '701829',  # olive oil
    '381895',  # toilet paper
    '700630',  # Dijon mustard 
    '700631',  # Whole grain mustard
    '360257',  # Pic's peanut butter 380g
    '887052',  # Pic's peanut butter 1000g
]

%time f = collect_product_info(codes)
f

CPU times: user 384 ms, sys: 12 ms, total: 396 ms
Wall time: 1.86 s


Unnamed: 0,name,description,size,on_sale,sale_price,price,cup_price,datetime
0,Mainland Cheese Block Organic Cheddar,Mainland organic cheddar is a mild cheddar che...,500g,True,7.3,11.0,$14.60/1KG,2017-05-24 20:11:31
1,Green & Blacks Chocolate Block Organic Dark Ch...,,100g,True,3.49,3.89,$3.49/100G,2017-05-24 20:11:31
2,Lupi Olive Oil Organic Extra Virgin,,750ml,False,,15.0,$2.00/100ML,2017-05-24 20:11:31
3,Earthcare Toilet Paper 6pk Double Lenght Sky ...,,6pk,False,,5.0,$0.19/100SS,2017-05-24 20:11:31
4,Macro Organic Mustard Dijon,,200g,True,2.99,3.49,,2017-05-24 20:11:32
5,Pics Peanut Butter Crunchy,,380g,True,6.0,6.5,$1.58/100G,2017-05-24 20:11:32
6,Macro Organic Mustard Wholegrain,,200g,True,2.99,3.49,,2017-05-24 20:11:32
7,Pics Peanut Butter Crunchy,,1kg,False,,18.49,$18.49/1KG,2017-05-24 20:11:32
