In [160]:
import certifi
import urllib3
from bs4 import BeautifulSoup as bs
import pprint as pp
import os
from multiprocessing import Pool
import json
from numpy.random import choice
from random import randint
from num2words import num2words
import random

# Web scraping

In [161]:
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED',ca_certs=certifi.where())

In [162]:
# define paths for data caching
dataDir = 'data'
if not os.path.exists(dataDir):
    os.makedirs(dataDir)
urlListFName = '%s/urls.txt' % dataDir

## wineoneline.com.au

In [163]:
# initialise web fetching stuff
domain = "http://www.wineonline.com.au"
dataDirThisDomain = dataDir + '/wineonline'
woAllTextFname = '%s/all.txt' % dataDirThisDomain

In [164]:
# scrape list of wines to get a list of urls to individual wines
if not os.path.exists(urlListFName):
    individualPages = []

    # get list of wine pages
    for wine_type in ['sparkling','white-wine','red-wine','imported']:
        print("Start of wine_type %s" % wine_type)
        page = 0
        while True:
            suffix = '?sort=alphaasc&page=%d' % page
            url = '/'.join([domain,wine_type,suffix])
            r = http.request('GET',url)
            if r.status == 200:
                print("Hit: " + url)
                html = r.data.decode('utf-8')
                soup = bs(html, 'html.parser')
                individualPages += [a.get('href') for a in soup.select('.ProdHeading a')]
                page = page + 1

            else:
                print("end of %s at %d" % (wine_type, page))
                break
    individualPages = list(set(individualPages))
    print("Writing list of urls to %s" % urlListFName)
    with open(urlListFName,'w') as f:
        f.write('\n'.join(individualPages))
else:
    print("Skipping fetching from the search results pages")
print("Reading in from %s" % urlListFName)
with open(urlListFName,'r') as f:
    individualPages = [line.strip() for line in f.read().split('\n') if line.strip() != '']
#individualPages = individualPages[:10] # only some for now
print("done")

Skipping fetching from the search results pages
Reading in from data/urls.txt
done


In [165]:
paths = [url.rstrip('/').split('/')[-1] for url in individualPages]
assert(len(paths) == len(individualPages))
print("done")

done


In [166]:

# scrape all the wines in the list

def fetchPath(path):
#     print("fetching %s" % url)
    url = domain.rstrip('/') + '/' + path.lstrip('/')
    dirName = '%s/bottles/%s' % (dataDirThisDomain,path)
    if not os.path.exists(dirName):
        os.makedirs(dirName)
    htmlFName = '%s/%s' % (dirName,'html.html')
    if not os.path.exists(htmlFName):
        print("Fetching %s" % url)
        r = http.request('GET',url)
        if r.status == 200:
    #         print("Parsing %s" % url)
            html = r.data.decode('utf-8').replace(u'\xa0', u' ')
            print("Saving to %s" % htmlFName)
            with open(htmlFName,'w') as f:
                f.write(html)
        else:
            print("Error, status %d for url %s" % (r.status,url))
            return([])
    else:
        print("Hit cache for %s" % htmlFName)
    with open(htmlFName,'r') as f:
        html = f.read()
    return(html)
    

if not os.path.exists(woAllTextFname):
    with Pool(20) as p:
        htmls = p.map(fetchPath,paths)
    print("done")
else:
    print("Skipping, because I'll read from cache")

Hit cache for data/wineonline/bottles/bass-phillip-estate-pinot-noir-2011/html.html
Hit cache for data/wineonline/bottles/lindemans-st-george-cabernet-sauvignon-2010/html.html
Hit cache for data/wineonline/bottles/zema-estate-coonawarra-cabernet-sauvignon-2012/html.html
Hit cache for data/wineonline/bottles/seresin-estate-organic-sauvignon-blanc-2016/html.html
Hit cache for data/wineonline/bottles/ninth-island-chardonnay-2013/html.html
Hit cache for data/wineonline/bottles/terre-a-terre-cabernet-sauvignon-2012/html.html
Hit cache for data/wineonline/bottles/yabby-lake-block-1-pinot-noir-2015/html.html
Hit cache for data/wineonline/bottles/paxton-tempranillo-2016/html.html
Hit cache for data/wineonline/bottles/de-bortoli-yarra-valley-villages-chardonnay-2015/html.html
Hit cache for data/wineonline/bottles/seville-estate-the-barber-chardonnay-2016/html.html
Hit cache for data/wineonline/bottles/krug-vintage-2003/html.html
Hit cache for data/wineonline/bottles/by-farr-viognier-2012/html.h

Hit cache for data/wineonline/bottles/vasse-felix-chardonnay-2014/html.html
Hit cache for data/wineonline/bottles/babich-marlborough-sauvignon-blanc-2015/html.html
Hit cache for data/wineonline/bottles/little-yering-chardonnay-2013/html.html
Hit cache for data/wineonline/bottles/katnook-prodigy-shiraz-2008/html.html
Hit cache for data/wineonline/bottles/yalumba-paradox-shiraz-2012/html.html
Hit cache for data/wineonline/bottles/yering-station-village-rose-2016/html.html
Hit cache for data/wineonline/bottles/kilikanoon-killermans-run-cabernet-sauvignon-2015/html.html
Hit cache for data/wineonline/bottles/paringa-estate-pe-pinot-noir-2017/html.html
Hit cache for data/wineonline/bottles/cullen-mangan-vineyard-semillon-sauvignon-blanc-2011/html.html
Hit cache for data/wineonline/bottles/mollydooker-the-scooter-merlot-2015/html.html
Hit cache for data/wineonline/bottles/lambrook-adelaide-hills-chardonnay-2013/html.html
Hit cache for data/wineonline/bottles/paxton-the-guesser-white-2016/html

Hit cache for data/wineonline/bottles/mitolo-savitar-shiraz-2013/html.html
Hit cache for data/wineonline/bottles/tapanappa-fleurieu-peninsula-pinot-noir-2015/html.html
Hit cache for data/wineonline/bottles/barossa-valley-estate-grenache-shiraz-mourvedre-2015/html.html
Hit cache for data/wineonline/bottles/angove-wild-olive-organic-shiraz-2013/html.html
Hit cache for data/wineonline/bottles/penfolds-bin-311-tumbarumba-chardonnay-2015/html.html
Hit cache for data/wineonline/bottles/barossa-valley-estate-cabernet-sauvignon-2015/html.html
Hit cache for data/wineonline/bottles/penfolds-maxs-cabernet-sauvignon-2014/html.html
Hit cache for data/wineonline/bottles/wynns-coonawarra-estate-michael-shiraz-2008/html.html
Hit cache for data/wineonline/bottles/kilikanoon-killermans-run-shiraz-2015/html.html
Hit cache for data/wineonline/bottles/mitchelton-chardonnay-2015/html.html
Hit cache for data/wineonline/bottles/tokar-estate-carafe-tumbler-estate-blend-2015/html.html
Hit cache for data/wineonl

In [167]:
if not os.path.exists(woAllTextFname):
    paras = []
    soups = [bs(html) for html in htmls]
    for soup in soups:
        print('next soup')
        paras += soup.select('#ProductDescription p')

    print("done")
else:
    print("Skipping, because I'll read from cache")

next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup
next soup


In [168]:
if not os.path.exists(woAllTextFname):
    for para in paras:
        for tag in ['span','strong','p']:
            for el in para.find_all(tag):
                if el.string:
                    num_words = len(el.string.replace('\n',' ').split(' '))
                    if num_words < 15:
                        el.string.replace_with('')
else:
    print("Skipping, because I'll read from cache")

In [169]:
if not os.path.exists(woAllTextFname):
    text = '\n'.join([para.get_text() for para in paras]).replace(u'\xa0',' ')
    for c in '.?!':
        text = text.replace(c,c + ' ').replace(c + '  ',c + ' ')
        
        # hard code fix for their typo
        text = text.replace("tinges and aroma's of stone fruits","tinges and aromas of stone fruits")
    with open(woAllTextFname,'w') as f:
        f.write(text)

with open(woAllTextFname,'r') as f:
    allTextWO = f.read()
    


## nakedwines.com.au

In [170]:
# found the number 210 through trial and error
# bigger number means more results per page
# but their crappy site fails when you enter a number too big (e.g. 210)
domain = 'https://www.nakedwines.com.au'

dataDirThisDomain = dataDir + '/nakedwines'

In [171]:
def getSearchResults(searchPage):
    url = domain + "/wines/index?searchText=&sortWines=descprice&pageSize=20&view=Wines&layoutType=card&allWines=true&pageNum=%d" % searchPage
    print("Fetching " + url)
    r = http.request('GET',url)
    assert(r.status == 200)
    print("Hit: " + url)
    html = r.data.decode('utf-8')
    soup = bs(html, 'html.parser')
    links = [a.get('href') for a in soup.select('a.card__header')]
    print("Found %d results for searchPage %d" % (len(links),searchPage))
    return(links)



if not os.path.exists(dataDirThisDomain):
    os.makedirs(dataDirThisDomain)
fname = '%s/paths.txt' % dataDirThisDomain

if not os.path.exists(fname):
    print("No cache found, fetching from search results on " + domain)
    searchPage = 0
    results = getSearchResults(searchPage)
    individualPages = results
    while len(results) > 0:
        searchPage += 1
        results = getSearchResults(searchPage)
        individualPages += results
    with open(fname,'w') as f:
        f.write('\n'.join(individualPages))
    # write and then read back from file
    # to make sure we wrote correctly

print("Reading urls from cache")
with open(fname,'r') as f:
    individualPages = [x.strip() for x in f.read().split('\n') if x.strip() != '']
    

print("Using urls:")
print('\n'.join(individualPages[:3] + ['...'])) 
print("Discovered %d individual pages" % len(individualPages))

    


Reading urls from cache
Using urls:
/products/karriview-pinot-noir-2016
/products/sorby-adams-small-barrel-barossa-shiraz-2016
/products/sam-plunkett-single-vineyard-series-gentle-annie-shiraz-2017
...
Discovered 301 individual pages


In [172]:

def fetchPath(path):
    url = domain + path
    print("Fetching " + url)
    r = http.request('GET',url)
    assert(r.status == 200)
    print("Hit: " + url)
    html = r.data.decode('utf-8')
    fname = dataDirThisDomain.rstrip('/') + '/' + path.lstrip('/') + '.html'
    directory = '/'.join(fname.split('/')[:-1])
    if not os.path.exists(directory):
        os.makedirs(directory)
    with open(fname,'w') as f:
        f.write(html)
    soup = bs(html, 'html.parser')
    textEls = soup.find(id="tab-tick")
    assert(textEls)
    text = textEls.get_text().replace(u'\xa0',' ').strip()
    for c in '.?!':
        text = text.replace(c,c + ' ').replace(c + '  ',c + ' ')
        
        # hard code fix for a typo
        text = text.replace("punches will above it","punches well above it")
    return(text)

fname = '%s/all.txt' % dataDirThisDomain 
if not os.path.exists(fname):
    print("No cache, fetching")
    with Pool(20) as p:
        texts = p.map(fetchPath,individualPages)
    with open(fname,'w') as f:
        f.write('\n'.join(texts))
else:
    print("hit cache")
    
with open(fname,'r') as f:
    allTextNW = f.read()

    
print("done")

hit cache
done


## combining web scrapes

In [173]:
allText = allTextWO + '\n' + allTextNW

In [174]:
# takes in a string, returns an array of words
# where punctuation is a 'word'
def toWords(s):
    s = s.replace('\n',' ').replace('\t',' ')
    for c in '!?.;$':
        s = s.replace(c,c+' ').replace(c + '  ',c + ' ')
    s = s.replace('%',' %') # I want to split off the percent, so I can treat numbers differently
    words = []
    for chunk in s.split(' '):
        chunk = chunk.lstrip('([{\'“').rstrip(')]}“').strip('`‘""”').replace('—','-').replace('–','-').replace('’',"'")
        if chunk.strip() != '':
            punctuation = ',.?!;'
            for p in punctuation:
                if chunk.endswith(p):
                    words.append(chunk[:-1])
                    words.append(p)
            if not(any([chunk.endswith(p) for p in punctuation])):
                words.append(chunk)
              
    # if a word appears uncapitalised once, uncapitalise it always
    wordsS = set(words)
    func = lambda x: x.lower() if x.lower() in wordsS else x
    words = [func(w) for w in words]
    
    # remove quotes like 'this quote'
    func = lambda x: x[:-1] if ((len(x) > 1) and (x[-1] == "'") and (x[-2] != 's')) else x
    assert(func("hello'") == "hello")
    assert(func("Yesterday's") == "Yesterday's")
    assert(func("Shores'") == "Shores'")
    assert(func("!") == "!")
    words = [func(w) for w in words]
    
    words = [w for w in words if w != '']
    
    return(words)

def testToWords():
    s = 'At the dawn'
    expected = ['At','the','dawn']
    actual = toWords(s)
    if expected != actual:
        print("expected: ")
        pp.pprint(expected)
        print("actual: ")
        pp.pprint(actual)
    assert(expected==actual)
    
    s = 'Of a  new  age\n'
    words = ['Of','a','new','age']
    assert(words==toWords(s))
    
    s = 'At the dawn, of  a\nnew \n age!'
    expected = ['At','the','dawn',',','of','a','new','age','!']
    actual = toWords(s)
    if expected != actual:
        print("expected: ")
        pp.pprint(expected)
        print("actual: ")
        pp.pprint(actual)
    assert(expected==actual)
    
    s = 'Here is here'
    words = ['here','is','here']
    assert(words==toWords(s))
    
    s = 'Here is (bracket stuff)'
    words = ['Here','is','bracket','stuff']
    if words != toWords(s):
        print(toWords(s))
    assert(words==toWords(s))
    
    
    s = 'Here is (bracket stuff!)'
    words = ['Here','is','bracket','stuff','!']
    if words != toWords(s):
        print(toWords(s))
    assert(words==toWords(s))
    
    s = 'Here is "a quote"'
    words = ['Here','is','a','quote']
    assert(words==toWords(s))
    
    s = "Ha! That's funny"
    words = ['Ha','!',"That's",'funny']
    assert(words==toWords(s))
    
    s = "For only $10"
    words = ['For','only','$','10']
    assert(words==toWords(s))
    
testToWords()
print("test passed")

test passed


## utilities

In [175]:

def capitalise(word):
    if len(word) == 1:
        return(word.upper())
    else:
        return(word[0].upper() + word[1:])

assert(capitalise('a') == 'A')
assert(capitalise('hello') == 'Hello')
assert(capitalise('A') == 'A')
assert(capitalise('Hello') == 'Hello')

def filter(word):
    names = ['Ben','James','Tyson','Steve','Andrew',
             'Campbell','Jen','Margaret','Nigel', 
             'Kim','James-Paul','Gary','Kym','Rob','Stu',
             'Stuart'
            ]
    for name in names:
        if word in [name,name + "'s",name + "’s"]:
            return(True)
    if word.lower() in ['angel',"angel's","angels",'naked']:
        # Naked Wines mentions these a lot
        return(True)
    elif word.lower() in ['points']:
        # wineonline mentions these
        return(True)
    else:
        return(False)
    
def applyFilter(k):
    if type(k) == '':
        return(filter(k))
    elif type(k) == type(('','')):
        return any([filter(w) for w in words])
    else:
        assert(False)
    
# data is a dictionary
# the keys are what we are choosing
# the values are the weights
def weightedRandom(dist):
    
    entries = [k for k in dist.keys()]
    if len(entries) == 0:
        return(None) # all filtered out
    probabilities = [dist[e] for e in entries]
    scale = sum(probabilities)
    probabilities = [float(p)/scale for p in probabilities]
    draw = choice(range(len(entries)), 1, p=probabilities)[0]
    return(entries[draw])

# takes in a word (string with no whitespace)
# If that is a positive integer, returns a generalised string
# so that '1' and '3' map to the same value
# which makes my markov chain more generalised
def generaliseNum(word):
    word
    # return non-number strings as they are
    if word in ['','.','0',',']:
        return(word)
    elif not all([c in '1234567890,' for c in word]):
        return(word)
    
    try:
        x = int(word.replace(',',''))
    except ValueError as e:
        print("Word: %s" % word)
        raise(e)
    assert(x > 0)
    
    if x < 10:
        return('MAGIC_INT_LESS_THAN_TEN')
    elif x < 100:
        return('MAGIC_INT_LESS_THAN_HUNDRED')
    elif x < 1900:
        return("MAGIC_INT_THOUSANDS")
    elif 1900 < x <= 2020:
        return('MAGIC_INT_YEAR')
    elif x < 10000:
        return("MAGIC_INT_TEN_THOUSANDS")
    else:
        return(word)
       
# undo generaliseNum
# not exactly, since it will come out with a different number, but same range
def ungeneraliseNum(word):
    word = str(word) 
    if not word.startswith('MAGIC'):
        return(word)
    elif word == 'MAGIC_INT_LESS_THAN_TEN':
        return(str(randint(1,9)))
    elif word == 'MAGIC_INT_LESS_THAN_HUNDRED':
        return(str(randint(10,99)))
    elif word == "MAGIC_INT_THOUSANDS":
        return(str(randint(100,1900)))
    elif word == 'MAGIC_INT_YEAR':
        return(str(randint(1900,2020)))
    elif word == "MAGIC_INT_TEN_THOUSANDS":
        return(str(randint(2020,10000)))
    else:
        return(word)
        
for x in [0,1,3,10,12,50,99,100,1300,1901,2010,10000,'asd',-1]:
    start = str(x)
    middle = generaliseNum(start)
    end = ungeneraliseNum(middle)
    print('%s -> %s -> %s' % (start,middle,end))

    
assert(generaliseNum("1,001") == 'MAGIC_INT_THOUSANDS')

0 -> 0 -> 0
1 -> MAGIC_INT_LESS_THAN_TEN -> 4
3 -> MAGIC_INT_LESS_THAN_TEN -> 3
10 -> MAGIC_INT_LESS_THAN_HUNDRED -> 25
12 -> MAGIC_INT_LESS_THAN_HUNDRED -> 56
50 -> MAGIC_INT_LESS_THAN_HUNDRED -> 32
99 -> MAGIC_INT_LESS_THAN_HUNDRED -> 36
100 -> MAGIC_INT_THOUSANDS -> 105
1300 -> MAGIC_INT_THOUSANDS -> 1240
1901 -> MAGIC_INT_YEAR -> 1942
2010 -> MAGIC_INT_YEAR -> 1999
10000 -> 10000 -> 10000
asd -> asd -> asd
-1 -> -1 -> -1


# N Layer chain

In [176]:
numLayers = 3
assert(numLayers > 0)

In [177]:
for x in range(100):
    for endChar in [' .!?;)`\'"']:
        for startChar in [' \n(`"\'']:
            for toCapitalise in [True,False]:
                if toCapitalise:
                    searchFor = startChar + capitalise(num2words(x)) + endChar
                else:
                    searchFor = startChar + num2words(x) + endChar
                replace = startChar + str(x) + endChar
                allText = allText.replace(searchFor,replace)
print("done")

done


In [178]:
allTextSplit = toWords(allText)
#print(allTextSplit)

In [179]:
words = [generaliseNum(w) for w in allTextSplit]
words = [('and' if w  == '&' else w) for w in words]
words = [('.' if w  == '.' else w) for w in words]

# split allTextSplit into a list of sentences, where each sentence is a list of words
sentences = [[]]
for word in words:
    sentences[-1].append(word)
    if word in '?!.':
        sentences.append([])


In [180]:
dataStart = {}
data = {N:{} for N in range(1,numLayers+1)}

solo = 0
many = 0

for sentence in sentences:
    if len(sentence) > numLayers:
        # get the start of the sentence
        firstN = tuple(sentence[0:numLayers])
        if not any([filter(w) for w in firstN]): # filter start tuple
            if firstN in dataStart:
                dataStart[firstN] += 1
            else:
                dataStart[firstN] = 1

        for N in range(1,numLayers+1):
            # now the rest
            for i in range(N,len(sentence)):
                thisWord = sentence[i]
                prevWords = tuple(sentence[i-N:i])
                if not filter(thisWord):
                    if prevWords not in data[N]:
                        data[N][prevWords] = {thisWord:1}
                    elif thisWord not in data[N][prevWords]:
                        data[N][prevWords][thisWord] = 1
                        solo += 1
                    else:
                        data[N][prevWords][thisWord] += 1
                        many += 1

    
print("solo: %d" % solo)
print("many: %d" % many)


solo: 45139
many: 59168


In [181]:
# returns True p of the time
# e.g. p=0.5 is a coin flip
# p == 1 is always true
def coinFlip(p):
    return(random.random() < p)


In [182]:
def generateSentenceMany():
#     print("generating sentence")
    words = list(weightedRandom(dataStart))
#     print("First %d words are: %s" % (numLayers,str(words)))
    stats = {x:0 for x in range(1,numLayers+1)}
    numOnlyChoice = 0 # number of words where that is the only valid next choice
    while words[-1] not in '!?.':
        
        # try to find the next work
        nextWord = None
        for N in range(numLayers,1-1,-1): # from numLayers to 1
#             print("N=%d words=%s" % (N,str(words)))
            prevWords = tuple(words[-N:])
            assert(type(prevWords) == type((1,2)))
            if prevWords in data[N]:
                weights = data[N][prevWords]
                nextWord = weightedRandom(weights) # returns None if all possibilities are filtered
                if nextWord != None:
                    if (N != 1) and (len(weights) == 1):
                        # if there's only one option, flip a coin
                        # this is because I don't want to copy and paste whole phrases from the dataset
                        if coinFlip(0.5):
                            numOnlyChoice += 1
                            break
                        else:
                            nextWord = None
                    else:
                        if len(weights) == 1:
                            numOnlyChoice += 1
                        break
        try:
            assert(nextWord)
        except AssertionError as e:
            print("words: ")
            print(words)
            print("%s in data[%d] = %s" % (words[-1],1,(words[-1],) in data[1]))
            raise(e)
        try:
            assert(type(nextWord) == type(''))
        except AssertionError as e:
            print("nextWord: " + str(nextWord))
            raise(e)
        stats[N] += 1
#         print("appending word")
        words.append(nextWord)
    
    if numOnlyChoice > 0.8 * len(words):
        print("Discarding because too many only choice: " + str(words))
        return(generateSentenceMany())
    
    stats['numOnly'] = numOnlyChoice
    stats['numMany'] = len(words) - numOnlyChoice
    
    words = [ungeneraliseNum(w) for w in words]
        
    sentence = capitalise(words[0])
    for word in words[1:]:
        if (word not in ',.!?;') and (word != '...') and (sentence[-1] != '$'):
            sentence += ' '
        sentence += word
        
    if not (5 < len(words) < 25):
        # bad length, try again
        return(generateSentenceMany())
    else:
        return({'sentence':sentence,'stats':stats})

for _ in range(20):
    result = generateSentenceMany()
    print(result['sentence'])
    print(result['stats'])
    print('')

A highly approachable riesling thanks to maturation on yeast lees to enhance palate texture.
{1: 2, 2: 4, 3: 6, 'numOnly': 6, 'numMany': 9}

Vintage began a week earlier than 1910.
{1: 0, 2: 2, 3: 3, 'numOnly': 3, 'numMany': 5}

Parcels were judiciously blended to perfection.
{1: 0, 2: 2, 3: 2, 'numOnly': 2, 'numMany': 5}

Once the cat is out.
{1: 1, 2: 1, 3: 1, 'numOnly': 2, 'numMany': 4}

Think juicy black fruits, spice and nuts will be long.
{1: 2, 2: 2, 3: 5, 'numOnly': 2, 'numMany': 10}

The Paxon the Guesser red!
{1: 1, 2: 1, 3: 1, 'numOnly': 1, 'numMany': 5}

And formic Grange 1960 shiraz is sure to be the perfect go-to summer thirst quencher.
{1: 3, 2: 5, 3: 5, 'numOnly': 4, 'numMany': 12}

Aromas of citrus, chalk and nectarine.
{1: 0, 2: 0, 3: 5, 'numOnly': 2, 'numMany': 6}

Hamish has packed in pristine quality McLaren fruit was fermented separately and then blended together before bottling.
{1: 1, 2: 4, 3: 10, 'numOnly': 8, 'numMany': 10}

Rory has carefully blended Viognier