In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
stout_url = 'https://carwyncellars.com.au/collections/stout'

In [3]:
headers = requests.utils.default_headers()

headers.update(
    {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
    }
)

In [4]:
result = requests.get(stout_url, headers=headers)

In [5]:
soup = BeautifulSoup(result.content, 'html.parser')

In [7]:
products = soup.find_all("li", { "class" : "product" }) 
len(products)

16

In [8]:
untapped_search_url = 'https://untappd.com/search'

In [42]:
def find_matching_beers(product_name):
    """
    Searches untappd with the given product name and returns results
    
    Args:
        product_name (string): The product name to search untappd for
        
    Returns:
        Array of matching html elements for untappd search results
    """
    result = requests.get(untapped_search_url, params={'q': product_name.replace('&','') })
    print(result.url)
    soup = BeautifulSoup(result.content, 'html.parser')
    matching_beers = soup.find_all('div', {'class': 'beer-item'})
    return matching_beers

def strip_stop_words(product_name, stop_words):
    """
    Strips out given stop words from product name
    
    Args:
        product_name (string): The product name to strip stop words from
        stop_words (list): The stop words to strip from the product name
    
    Returns:
        (string) the product name with stop words stripped from it
    """
    product_name_stripped = product_name
    for word in stop_words:
        product_name_stripped = product_name_stripped.replace(word, '')
    return product_name_stripped

def print_matches(product_name, stop_words, price):
    matching_beers = find_matching_beers(strip_stop_words(product_name, stop_words))
    for beer in matching_beers:
        name = beer.find('p', {'class', 'name'}).a.text.strip()
        rating = beer.find('span', {'class', 'num'}).text.replace('(','').replace(')','').strip()
        brewery = beer.find('p', {'class', 'brewery'}).a.text.strip()
        if (float(rating) > 0.1):
            print('*', brewery, name, rating, price)
            break
    print()
    try:
        if brewery:
            return [brewery, name, rating, price]
    except:
        pass

In [47]:
data = []
for product in products:
    brand = product.find('h3', {'class': 'brand'}).text
    product_name = product.find('h2', {'class': 'title'}).text
    price = product.find('span', {'class': 'money'}).text
    if brand == 'Carwyn Cellars':
        product_name = brand + ' ' + product_name
    product_name = product_name.replace('Boatrocker Ramjet Whisky Barrel Aged Imperial Stout with Coffee', 'Boatrocker Coffee Ramjet')
    sold_out = product.find('span', {'class': 'badge-soldout'})
    if sold_out: 
        continue
    product_name = product_name.replace('De Molen','Brouwerij De Molen')
    print(brand, product_name)
    stop_words = ['&','Whiskey Barrel Aged', '2017 Release 13.6%',
                  'Rye Barrel Aged Imperial Stout']
    if '+' in product_name:
        ps = product_name.split('+')
        first = True
        for p in ps:
            if first:
                first = False
            else:
                p = brand + " " + p
            m = print_matches(product_name=p,stop_words=stop_words,price=price)
            if m:
                data.append(m)
    else:
        m = print_matches(product_name=product_name,stop_words=stop_words,price=price)
        if m:
            data.append(m)

Boatrocker Boatrocker Coffee Ramjet
https://untappd.com/search?q=Boatrocker+Coffee+Ramjet
* Boatrocker Brewing Company Coffee Ramjet (2016) 4.492 $17.00

Carwyn Cellars Carwyn Cellars M Doulton
https://untappd.com/search?q=Carwyn+Cellars+M+Doulton

Hawkers Hawkers Imperial Stout 2016
https://untappd.com/search?q=Hawkers+Imperial+Stout+2016
* Hawkers Beer Imperial Stout (2016) 4.138 $10.00

De Molen Brouwerij De Molen Rasputin
https://untappd.com/search?q=Brouwerij+De+Molen+Rasputin
* Brouwerij De Molen Rasputin 3.951 $15.00

BrewDog BrewDog Dog D
https://untappd.com/search?q=BrewDog+Dog+D
* BrewDog Dog D 4.374 $30.00

De Molen Brouwerij De Molen Hel & Verdoemenis (Bruichladdich BA - Brett)
https://untappd.com/search?q=Brouwerij+De+Molen+Hel++Verdoemenis+%28Bruichladdich+BA+-+Brett%29
* Brouwerij De Molen Hel & Verdoemenis Bruichladdich BA - Brett 4.132 $20.00

Deschutes Brewery Deschutes 'The Abyss' Rye Barrel Aged Imperial Stout
https://untappd.com/search?q=Deschutes+%27The+Abyss%27+


In [48]:
data

[['Boatrocker Brewing Company', 'Coffee Ramjet (2016)', '4.492', '$17.00'],
 ['Hawkers Beer', 'Imperial Stout (2016)', '4.138', '$10.00'],
 ['Brouwerij De Molen', 'Rasputin', '3.951', '$15.00'],
 ['BrewDog', 'Dog D', '4.374', '$30.00'],
 ['Brouwerij De Molen',
  'Hel & Verdoemenis Bruichladdich BA - Brett',
  '4.132',
  '$20.00'],
 ['Deschutes Brewery', 'The Abyss', '4.201', '$65.00'],
 ['BrewDog', 'Dog E', '4.251', '$32.00'],
 ['BrewDog', 'Dog C', '4.347', '$30.00'],
 ['Brouwerij De Molen', 'Spanning & Sensatie', '4.013', '$15.00'],
 ['Brouwerij De Molen',
  'Hel & Verdoemenis Bruichladdich (peated) BA',
  '4.198',
  '$19.00'],
 ['Brouwerij De Molen', 'Hel & Verdoemenis', '4.036', '$15.00']]

In [49]:
import pandas as pd
df = pd.DataFrame(data=data, columns=['Brewery','Name','Rating','Price'])
df

Unnamed: 0,Brewery,Name,Rating,Price
0,Boatrocker Brewing Company,Coffee Ramjet (2016),4.492,$17.00
1,Hawkers Beer,Imperial Stout (2016),4.138,$10.00
2,Brouwerij De Molen,Rasputin,3.951,$15.00
3,BrewDog,Dog D,4.374,$30.00
4,Brouwerij De Molen,Hel & Verdoemenis Bruichladdich BA - Brett,4.132,$20.00
5,Deschutes Brewery,The Abyss,4.201,$65.00
6,BrewDog,Dog E,4.251,$32.00
7,BrewDog,Dog C,4.347,$30.00
8,Brouwerij De Molen,Spanning & Sensatie,4.013,$15.00
9,Brouwerij De Molen,Hel & Verdoemenis Bruichladdich (peated) BA,4.198,$19.00


In [50]:
df.to_csv('Carwyn_Cellars.csv')