In [88]:
import datetime as dt
from collections import OrderedDict

import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np


In [91]:
def price_to_float(price_string):
    return float(price_string.replace('$', ''))
          
def get_product_info(stock_code):
    # Call Countdown
    url = 'https://shop.countdown.co.nz/Shop/ProductDetails?'
    params = {
        'stockcode': stock_code,
    }
    r = requests.get(url, params=params)
    r.raise_for_status()
    
    # Parse response
    d = OrderedDict()
    d['stock_code'] = stock_code
    
    soup = BeautifulSoup(r.text, 'lxml')
    
    s = soup.select('div.product-title > h1')
    d['name'] = list(s[0].stripped_strings)[0]
    
    s = soup.select('p.product-description-text')
    ss = list(s[0].stripped_strings)
    if ss:
        d['description'] = ss[0]
    else:
        d['description'] = None
    
    s = soup.select('span.special-price')
    if s:
        d['on_sale'] = True
        ss = list(s[0].stripped_strings)
        d['sale_price'] = price_to_float(ss[0])
        t = soup.select('span.was-price')
        tt = list(t[0].stripped_strings)
        d['price'] = price_to_float(tt[0].replace('was', ''))
    else:
        d['on_sale'] = False
        d['sale_price'] = None    
        t = soup.select('span.price')
        tt = list(t[0].stripped_strings)
        d['price'] = price_to_float(tt[0])
    
    s = soup.select('div.cup-price')
    ss = list(s[0].stripped_strings)
    d['cup_price'] = ss[0]

    d['datetime'] = dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

    return d

def collect_product_info(stock_codes, as_df=True):
    result = []
    for stock_code in stock_codes:
        try:
            x = get_product_info(stock_code)
        except:
            x = {}
        result.append(x)
    
    if as_df:
        result = pd.DataFrame(result)
        result['datetime'] = pd.to_datetime(result['datetime'])
        
    return result



In [93]:
# Test some
codes = [
    '281739',  # cheese
    '260803',  # chocolate
    '701829',  # olive oil
    '381895',  # toilet paper
]

#get_product_info(code_ns)
f = collect_product_info(codes)
f

Unnamed: 0,stock_code,name,description,on_sale,sale_price,price,cup_price,datetime
0,281739,Mainland Cheese Block Organic Cheddar,Mainland organic cheddar is a mild cheddar che...,True,7.3,11.0,$14.60/1KG,2017-05-23 19:38:41
1,260803,Green & Blacks Chocolate Block Organic Dark Ch...,,True,3.49,3.89,$3.49/100G,2017-05-23 19:38:42
2,701829,Lupi Olive Oil Organic Extra Virgin,,False,,15.0,$2.00/100ML,2017-05-23 19:38:42
3,381895,Earthcare Toilet Paper 6pk Double Lenght Sky ...,,False,,5.0,$0.19/100SS,2017-05-23 19:38:42
